@@ -392,6 +392,10 @@ struct mlx5_cqe {
/* CQE format value. */
#define MLX5_COMPRESSED 0x3
+/* CQ doorbell cmd types. */
+#define MLX5_CQ_DBR_CMD_SOL_ONLY (1 << 24)
+#define MLX5_CQ_DBR_CMD_ALL (0 << 24)
+
/* Action type of header modification. */
enum {
MLX5_MODIFICATION_TYPE_SET = 0x1,
@@ -10,12 +10,16 @@
#include <rte_vdpa.h>
#include <rte_vhost.h>
#include <rte_spinlock.h>
+#include <rte_interrupts.h>
#include <mlx5_glue.h>
#include <mlx5_devx_cmds.h>
#include <mlx5_prm.h>
+#define MLX5_VDPA_INTR_RETRIES 256
+#define MLX5_VDPA_INTR_RETRIES_USEC 1000
+
struct mlx5_vdpa_cq {
uint16_t log_desc_n;
uint32_t cq_ci:24;
@@ -55,6 +59,7 @@ struct mlx5_vdpa_priv {
uint32_t eqn;
struct mlx5dv_devx_event_channel *eventc;
struct mlx5dv_devx_uar *uar;
+ struct rte_intr_handle intr_handle;
SLIST_HEAD(mr_list, mlx5_vdpa_query_mr) mr_list;
};
@@ -113,4 +118,23 @@ int mlx5_vdpa_cq_create(struct mlx5_vdpa_priv *priv, uint16_t desc_n,
*/
void mlx5_vdpa_cq_global_release(struct mlx5_vdpa_priv *priv);
+/**
+ * Setup CQE event.
+ *
+ * @param[in] priv
+ * The vdpa driver private structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int mlx5_vdpa_cqe_event_setup(struct mlx5_vdpa_priv *priv);
+
+/**
+ * Unset CQE event .
+ *
+ * @param[in] priv
+ * The vdpa driver private structure.
+ */
+void mlx5_vdpa_cqe_event_unset(struct mlx5_vdpa_priv *priv);
+
#endif /* RTE_PMD_MLX5_VDPA_H_ */
@@ -4,10 +4,15 @@
#include <unistd.h>
#include <stdint.h>
#include <assert.h>
+#include <fcntl.h>
#include <rte_malloc.h>
#include <rte_errno.h>
#include <rte_lcore.h>
+#include <rte_atomic.h>
+#include <rte_common.h>
+
+#include <mlx5_common.h>
#include "mlx5_vdpa_utils.h"
#include "mlx5_vdpa.h"
@@ -78,6 +83,30 @@
memset(cq, 0, sizeof(*cq));
}
+static inline void
+mlx5_vdpa_cq_arm(struct mlx5_vdpa_priv *priv, struct mlx5_vdpa_cq *cq)
+{
+ const unsigned int cqe_mask = (1 << cq->log_desc_n) - 1;
+ uint32_t arm_sn = cq->arm_sn << MLX5_CQ_SQN_OFFSET;
+ uint32_t cq_ci = cq->cq_ci & MLX5_CI_MASK & cqe_mask;
+ uint32_t doorbell_hi = arm_sn | MLX5_CQ_DBR_CMD_ALL | cq_ci;
+ uint64_t doorbell = ((uint64_t)doorbell_hi << 32) | cq->cq->id;
+ uint64_t db_be = rte_cpu_to_be_64(doorbell);
+ uint32_t *addr = RTE_PTR_ADD(priv->uar->base_addr, MLX5_CQ_DOORBELL);
+
+ rte_io_wmb();
+ cq->db_rec[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
+ rte_wmb();
+#ifdef RTE_ARCH_64
+ *(uint64_t *)addr = db_be;
+#else
+ *(uint32_t *)addr = db_be;
+ rte_io_wmb();
+ *((uint32_t *)addr + 1) = db_be >> 32;
+#endif
+ cq->arm_sn++;
+}
+
int
mlx5_vdpa_cq_create(struct mlx5_vdpa_priv *priv, uint16_t desc_n, int callfd,
struct mlx5_vdpa_cq *cq)
@@ -147,8 +176,108 @@
goto error;
}
}
+ /* First arming. */
+ mlx5_vdpa_cq_arm(priv, cq);
return 0;
error:
mlx5_vdpa_cq_destroy(cq);
return -1;
}
+
+static inline void __rte_unused
+mlx5_vdpa_cq_poll(struct mlx5_vdpa_priv *priv __rte_unused,
+ struct mlx5_vdpa_cq *cq)
+{
+ const unsigned int cqe_mask = (1 << cq->log_desc_n) - 1;
+ int ret;
+
+ do {
+ volatile struct mlx5_cqe *cqe = cq->cqes + (cq->cq_ci &
+ cqe_mask);
+
+ ret = check_cqe(cqe, cqe_mask + 1, cq->cq_ci);
+ switch (ret) {
+ case MLX5_CQE_STATUS_ERR:
+ cq->errors++;
+ /*fall-through*/
+ case MLX5_CQE_STATUS_SW_OWN:
+ cq->cq_ci++;
+ break;
+ case MLX5_CQE_STATUS_HW_OWN:
+ default:
+ break;
+ }
+ } while (ret != MLX5_CQE_STATUS_HW_OWN);
+ rte_io_wmb();
+ cq->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
+}
+
+static void
+mlx5_vdpa_interrupt_handler(void *cb_arg)
+{
+#ifndef HAVE_IBV_DEVX_EVENT
+ (void)cb_arg;
+ return;
+#else
+ struct mlx5_vdpa_priv *priv = cb_arg;
+ union {
+ struct mlx5dv_devx_async_event_hdr event_resp;
+ uint8_t buf[sizeof(struct mlx5dv_devx_async_event_hdr) + 128];
+ } out;
+
+ while (mlx5_glue->devx_get_event(priv->eventc, &out.event_resp,
+ sizeof(out.buf)) >=
+ (ssize_t)sizeof(out.event_resp.cookie)) {
+ struct mlx5_vdpa_cq *cq = (struct mlx5_vdpa_cq *)
+ (uintptr_t)out.event_resp.cookie;
+ rte_spinlock_lock(&cq->sl);
+ mlx5_vdpa_cq_poll(priv, cq);
+ mlx5_vdpa_cq_arm(priv, cq);
+ rte_spinlock_unlock(&cq->sl);
+ DRV_LOG(DEBUG, "CQ %p event: new cq_ci = %u.", cq, cq->cq_ci);
+ }
+#endif /* HAVE_IBV_DEVX_ASYNC */
+}
+
+int
+mlx5_vdpa_cqe_event_setup(struct mlx5_vdpa_priv *priv)
+{
+ int flags = fcntl(priv->eventc->fd, F_GETFL);
+ int ret = fcntl(priv->eventc->fd, F_SETFL, flags | O_NONBLOCK);
+ if (ret) {
+ DRV_LOG(ERR, "Failed to change event channel FD.");
+ rte_errno = errno;
+ return -rte_errno;
+ }
+ priv->intr_handle.fd = priv->eventc->fd;
+ priv->intr_handle.type = RTE_INTR_HANDLE_EXT;
+ if (rte_intr_callback_register(&priv->intr_handle,
+ mlx5_vdpa_interrupt_handler, priv)) {
+ priv->intr_handle.fd = 0;
+ DRV_LOG(ERR, "Failed to register CQE interrupt %d.", rte_errno);
+ return -rte_errno;
+ }
+ return 0;
+}
+
+void
+mlx5_vdpa_cqe_event_unset(struct mlx5_vdpa_priv *priv)
+{
+ int retries = MLX5_VDPA_INTR_RETRIES;
+ int ret = -EAGAIN;
+
+ if (priv->intr_handle.fd) {
+ while (retries-- && ret == -EAGAIN) {
+ ret = rte_intr_callback_unregister(&priv->intr_handle,
+ mlx5_vdpa_interrupt_handler,
+ priv);
+ if (ret == -EAGAIN) {
+ DRV_LOG(DEBUG, "Try again to unregister fd %d "
+ "of CQ interrupt, retries = %d.",
+ priv->intr_handle.fd, retries);
+ usleep(MLX5_VDPA_INTR_RETRIES_USEC);
+ }
+ }
+ memset(&priv->intr_handle, 0, sizeof(priv->intr_handle));
+ }
+}