[dpdk-dev] [PATCH v2 06/13] mlx5: add MTU configuration support
Adrien Mazarguil
adrien.mazarguil at 6wind.com
Fri Oct 30 19:52:35 CET 2015
Depending on the MTU and whether jumbo frames are enabled, RX queues may
switch between SG and non-SG modes for better performance.
Signed-off-by: Adrien Mazarguil <adrien.mazarguil at 6wind.com>
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro at 6wind.com>
---
drivers/net/mlx5/mlx5.c | 1 +
drivers/net/mlx5/mlx5.h | 1 +
drivers/net/mlx5/mlx5_ethdev.c | 102 +++++++++++++++++++++++
drivers/net/mlx5/mlx5_rxq.c | 178 +++++++++++++++++++++++++++++++++++++++++
drivers/net/mlx5/mlx5_rxtx.h | 1 +
5 files changed, 283 insertions(+)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index aafa70b..ddd74d0 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -140,6 +140,7 @@ static const struct eth_dev_ops mlx5_dev_ops = {
.tx_queue_release = mlx5_tx_queue_release,
.mac_addr_remove = mlx5_mac_addr_remove,
.mac_addr_add = mlx5_mac_addr_add,
+ .mtu_set = mlx5_dev_set_mtu,
};
static struct {
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 3f47a15..0e2457a 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -162,6 +162,7 @@ int priv_get_mtu(struct priv *, uint16_t *);
int priv_set_flags(struct priv *, unsigned int, unsigned int);
int mlx5_dev_configure(struct rte_eth_dev *);
void mlx5_dev_infos_get(struct rte_eth_dev *, struct rte_eth_dev_info *);
+int mlx5_dev_set_mtu(struct rte_eth_dev *, uint16_t);
int mlx5_ibv_device_to_pci_addr(const struct ibv_device *,
struct rte_pci_addr *);
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 6b13cec..0afc1bb 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -347,6 +347,23 @@ priv_get_mtu(struct priv *priv, uint16_t *mtu)
}
/**
+ * Set device MTU.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param mtu
+ * MTU value to set.
+ *
+ * @return
+ * 0 on success, -1 on failure and errno is set.
+ */
+static int
+priv_set_mtu(struct priv *priv, uint16_t mtu)
+{
+ return priv_set_sysfs_ulong(priv, "mtu", mtu);
+}
+
+/**
* Set device flags.
*
* @param priv
@@ -518,6 +535,91 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
}
/**
+ * DPDK callback to change the MTU.
+ *
+ * Setting the MTU affects hardware MRU (packets larger than the MTU cannot be
+ * received). Use this as a hint to enable/disable scattered packets support
+ * and improve performance when not needed.
+ * Since failure is not an option, reconfiguring queues on the fly is not
+ * recommended.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param in_mtu
+ * New MTU.
+ *
+ * @return
+ * 0 on success, negative errno value on failure.
+ */
+int
+mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
+{
+ struct priv *priv = dev->data->dev_private;
+ int ret = 0;
+ unsigned int i;
+ uint16_t (*rx_func)(void *, struct rte_mbuf **, uint16_t) =
+ mlx5_rx_burst;
+
+ priv_lock(priv);
+ /* Set kernel interface MTU first. */
+ if (priv_set_mtu(priv, mtu)) {
+ ret = errno;
+ WARN("cannot set port %u MTU to %u: %s", priv->port, mtu,
+ strerror(ret));
+ goto out;
+ } else
+ DEBUG("adapter port %u MTU set to %u", priv->port, mtu);
+ priv->mtu = mtu;
+ /* Temporarily replace RX handler with a fake one, assuming it has not
+ * been copied elsewhere. */
+ dev->rx_pkt_burst = removed_rx_burst;
+ /* Make sure everyone has left mlx5_rx_burst() and uses
+ * removed_rx_burst() instead. */
+ rte_wmb();
+ usleep(1000);
+ /* Reconfigure each RX queue. */
+ for (i = 0; (i != priv->rxqs_n); ++i) {
+ struct rxq *rxq = (*priv->rxqs)[i];
+ unsigned int max_frame_len;
+ int sp;
+
+ if (rxq == NULL)
+ continue;
+ /* Calculate new maximum frame length according to MTU and
+ * toggle scattered support (sp) if necessary. */
+ max_frame_len = (priv->mtu + ETHER_HDR_LEN +
+ (ETHER_MAX_VLAN_FRAME_LEN - ETHER_MAX_LEN));
+ sp = (max_frame_len > (rxq->mb_len - RTE_PKTMBUF_HEADROOM));
+ /* Provide new values to rxq_setup(). */
+ dev->data->dev_conf.rxmode.jumbo_frame = sp;
+ dev->data->dev_conf.rxmode.max_rx_pkt_len = max_frame_len;
+ ret = rxq_rehash(dev, rxq);
+ if (ret) {
+ /* Force SP RX if that queue requires it and abort. */
+ if (rxq->sp)
+ rx_func = mlx5_rx_burst_sp;
+ break;
+ }
+ /* Reenable non-RSS queue attributes. No need to check
+ * for errors at this stage. */
+ if (!priv->rss) {
+ if (priv->started)
+ rxq_mac_addrs_add(rxq);
+ }
+ /* Scattered burst function takes priority. */
+ if (rxq->sp)
+ rx_func = mlx5_rx_burst_sp;
+ }
+ /* Burst functions can now be called again. */
+ rte_wmb();
+ dev->rx_pkt_burst = rx_func;
+out:
+ priv_unlock(priv);
+ assert(ret >= 0);
+ return -ret;
+}
+
+/**
* Get PCI information from struct ibv_device.
*
* @param device
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 1eddfc7..71d4470 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -526,6 +526,184 @@ rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
#endif /* RSS_SUPPORT */
/**
+ * Reconfigure a RX queue with new parameters.
+ *
+ * rxq_rehash() does not allocate mbufs, which, if not done from the right
+ * thread (such as a control thread), may corrupt the pool.
+ * In case of failure, the queue is left untouched.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param rxq
+ * RX queue pointer.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
+{
+ struct priv *priv = rxq->priv;
+ struct rxq tmpl = *rxq;
+ unsigned int mbuf_n;
+ unsigned int desc_n;
+ struct rte_mbuf **pool;
+ unsigned int i, k;
+ struct ibv_exp_qp_attr mod;
+ struct ibv_recv_wr *bad_wr;
+ int err;
+ int parent = (rxq == &priv->rxq_parent);
+
+ if (parent) {
+ ERROR("%p: cannot rehash parent queue %p",
+ (void *)dev, (void *)rxq);
+ return EINVAL;
+ }
+ DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq);
+ /* Number of descriptors and mbufs currently allocated. */
+ desc_n = (tmpl.elts_n * (tmpl.sp ? MLX5_PMD_SGE_WR_N : 1));
+ mbuf_n = desc_n;
+ /* Enable scattered packets support for this queue if necessary. */
+ if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
+ (dev->data->dev_conf.rxmode.max_rx_pkt_len >
+ (tmpl.mb_len - RTE_PKTMBUF_HEADROOM))) {
+ tmpl.sp = 1;
+ desc_n /= MLX5_PMD_SGE_WR_N;
+ } else
+ tmpl.sp = 0;
+ DEBUG("%p: %s scattered packets support (%u WRs)",
+ (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc_n);
+ /* If scatter mode is the same as before, nothing to do. */
+ if (tmpl.sp == rxq->sp) {
+ DEBUG("%p: nothing to do", (void *)dev);
+ return 0;
+ }
+ /* Remove attached flows if RSS is disabled (no parent queue). */
+ if (!priv->rss) {
+ rxq_mac_addrs_del(&tmpl);
+ /* Update original queue in case of failure. */
+ memcpy(rxq->mac_flow, tmpl.mac_flow, sizeof(rxq->mac_flow));
+ }
+ /* From now on, any failure will render the queue unusable.
+ * Reinitialize QP. */
+ mod = (struct ibv_exp_qp_attr){ .qp_state = IBV_QPS_RESET };
+ err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
+ if (err) {
+ ERROR("%p: cannot reset QP: %s", (void *)dev, strerror(err));
+ assert(err > 0);
+ return err;
+ }
+ err = ibv_resize_cq(tmpl.cq, desc_n);
+ if (err) {
+ ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
+ assert(err > 0);
+ return err;
+ }
+ mod = (struct ibv_exp_qp_attr){
+ /* Move the QP to this state. */
+ .qp_state = IBV_QPS_INIT,
+ /* Primary port number. */
+ .port_num = priv->port
+ };
+ err = ibv_exp_modify_qp(tmpl.qp, &mod,
+ (IBV_EXP_QP_STATE |
+#ifdef RSS_SUPPORT
+ (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
+#endif /* RSS_SUPPORT */
+ IBV_EXP_QP_PORT));
+ if (err) {
+ ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
+ (void *)dev, strerror(err));
+ assert(err > 0);
+ return err;
+ };
+ /* Reconfigure flows. Do not care for errors. */
+ if (!priv->rss) {
+ if (priv->started)
+ rxq_mac_addrs_add(&tmpl);
+ /* Update original queue in case of failure. */
+ memcpy(rxq->mac_flow, tmpl.mac_flow, sizeof(rxq->mac_flow));
+ }
+ /* Allocate pool. */
+ pool = rte_malloc(__func__, (mbuf_n * sizeof(*pool)), 0);
+ if (pool == NULL) {
+ ERROR("%p: cannot allocate memory", (void *)dev);
+ return ENOBUFS;
+ }
+ /* Snatch mbufs from original queue. */
+ k = 0;
+ if (rxq->sp) {
+ struct rxq_elt_sp (*elts)[rxq->elts_n] = rxq->elts.sp;
+
+ for (i = 0; (i != RTE_DIM(*elts)); ++i) {
+ struct rxq_elt_sp *elt = &(*elts)[i];
+ unsigned int j;
+
+ for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
+ assert(elt->bufs[j] != NULL);
+ pool[k++] = elt->bufs[j];
+ }
+ }
+ } else {
+ struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts.no_sp;
+
+ for (i = 0; (i != RTE_DIM(*elts)); ++i) {
+ struct rxq_elt *elt = &(*elts)[i];
+ struct rte_mbuf *buf = (void *)
+ ((uintptr_t)elt->sge.addr -
+ WR_ID(elt->wr.wr_id).offset);
+
+ assert(WR_ID(elt->wr.wr_id).id == i);
+ pool[k++] = buf;
+ }
+ }
+ assert(k == mbuf_n);
+ tmpl.elts_n = 0;
+ tmpl.elts.sp = NULL;
+ assert((void *)&tmpl.elts.sp == (void *)&tmpl.elts.no_sp);
+ err = ((tmpl.sp) ?
+ rxq_alloc_elts_sp(&tmpl, desc_n, pool) :
+ rxq_alloc_elts(&tmpl, desc_n, pool));
+ if (err) {
+ ERROR("%p: cannot reallocate WRs, aborting", (void *)dev);
+ rte_free(pool);
+ assert(err > 0);
+ return err;
+ }
+ assert(tmpl.elts_n == desc_n);
+ assert(tmpl.elts.sp != NULL);
+ rte_free(pool);
+ /* Clean up original data. */
+ rxq->elts_n = 0;
+ rte_free(rxq->elts.sp);
+ rxq->elts.sp = NULL;
+ /* Post WRs. */
+ err = ibv_post_recv(tmpl.qp,
+ (tmpl.sp ?
+ &(*tmpl.elts.sp)[0].wr :
+ &(*tmpl.elts.no_sp)[0].wr),
+ &bad_wr);
+ if (err) {
+ ERROR("%p: ibv_post_recv() failed for WR %p: %s",
+ (void *)dev,
+ (void *)bad_wr,
+ strerror(err));
+ goto skip_rtr;
+ }
+ mod = (struct ibv_exp_qp_attr){
+ .qp_state = IBV_QPS_RTR
+ };
+ err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
+ if (err)
+ ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
+ (void *)dev, strerror(err));
+skip_rtr:
+ *rxq = tmpl;
+ assert(err >= 0);
+ return err;
+}
+
+/**
* Configure a RX queue.
*
* @param dev
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index c7f634e..b6f2128 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -144,6 +144,7 @@ struct txq {
/* mlx5_rxq.c */
void rxq_cleanup(struct rxq *);
+int rxq_rehash(struct rte_eth_dev *, struct rxq *);
int rxq_setup(struct rte_eth_dev *, struct rxq *, uint16_t, unsigned int,
const struct rte_eth_rxconf *, struct rte_mempool *);
int mlx5_rx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
--
2.1.0
More information about the dev
mailing list