@@ -1,8 +1,8 @@
/*-
* BSD LICENSE
*
- * Copyright 2015 6WIND S.A.
- * Copyright 2015 Mellanox.
+ * Copyright 2015-2016 6WIND S.A.
+ * Copyright 2015-2016 Mellanox.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -122,6 +122,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
(void *)dev,
((priv->ctx != NULL) ? priv->ctx->device->name : ""));
/* In case mlx5_dev_stop() has not been called. */
+ mlx5_timesync_disable(dev);
priv_dev_interrupt_handler_uninstall(priv, dev);
priv_special_flow_disable_all(priv);
priv_mac_addrs_disable(priv);
@@ -219,6 +220,8 @@ static const struct eth_dev_ops mlx5_dev_ops = {
.rss_hash_update = mlx5_rss_hash_update,
.rss_hash_conf_get = mlx5_rss_hash_conf_get,
.filter_ctrl = mlx5_dev_filter_ctrl,
+ .timesync_enable = mlx5_timesync_enable,
+ .timesync_disable = mlx5_timesync_disable,
};
static struct {
@@ -1,8 +1,8 @@
/*-
* BSD LICENSE
*
- * Copyright 2015 6WIND S.A.
- * Copyright 2015 Mellanox.
+ * Copyright 2015-2016 6WIND S.A.
+ * Copyright 2015-2016 Mellanox.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -54,6 +54,7 @@
#ifdef PEDANTIC
#pragma GCC diagnostic ignored "-pedantic"
#endif
+#include <rte_alarm.h>
#include <rte_ether.h>
#include <rte_ethdev.h>
#include <rte_spinlock.h>
@@ -64,6 +65,7 @@
#endif
#include "mlx5_utils.h"
+#include "mlx5_time.h"
#include "mlx5_rxtx.h"
#include "mlx5_autoconf.h"
#include "mlx5_defs.h"
@@ -113,6 +115,7 @@ struct priv {
unsigned int mps:1; /* Whether multi-packet send is supported. */
unsigned int cqe_comp:1; /* Whether CQE compression is enabled. */
unsigned int pending_alarm:1; /* An alarm is pending. */
+ unsigned int timesync_en:1; /* Timesync (timestamping) enabled */
unsigned int txq_inline; /* Maximum packet size for inlining. */
unsigned int txqs_inline; /* Queue number threshold for inlining. */
/* RX/TX queues. */
@@ -120,6 +123,7 @@ struct priv {
unsigned int txqs_n; /* TX queues array size. */
struct rxq *(*rxqs)[]; /* RX queues. */
struct txq *(*txqs)[]; /* TX queues. */
+ struct mlx5_timesync timesync; /* time synronization object */
/* Indirection tables referencing all RX WQs. */
struct ibv_exp_rwq_ind_table *(*ind_tables)[];
unsigned int ind_tables_n; /* Number of indirection tables. */
@@ -203,6 +207,8 @@ int mlx5_set_link_up(struct rte_eth_dev *dev);
struct priv *mlx5_secondary_data_setup(struct priv *priv);
void priv_select_tx_function(struct priv *);
void priv_select_rx_function(struct priv *);
+int mlx5_timesync_enable(struct rte_eth_dev *dev);
+int mlx5_timesync_disable(struct rte_eth_dev *dev);
/* mlx5_mac.c */
@@ -79,4 +79,8 @@
/* Alarm timeout. */
#define MLX5_ALARM_TIMEOUT_US 100000
+/* Clock deviation fix alarm timeout*/
+#define MLX5_ALARM_CLOCK_DEVIATION_US 5000000
+#define MLX5_CLOCK_DEVIATION_THRESHOLD 10
+
#endif /* RTE_PMD_MLX5_DEFS_H_ */
@@ -1,8 +1,8 @@
/*-
* BSD LICENSE
*
- * Copyright 2015 6WIND S.A.
- * Copyright 2015 Mellanox.
+ * Copyright 2015-2016 6WIND S.A.
+ * Copyright 2015-2016 Mellanox.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -59,6 +59,7 @@
#include <rte_interrupts.h>
#include <rte_alarm.h>
#include <rte_malloc.h>
+#include <rte_time.h>
#ifdef PEDANTIC
#pragma GCC diagnostic error "-pedantic"
#endif
@@ -1412,3 +1413,220 @@ priv_select_rx_function(struct priv *priv)
{
priv->dev->rx_pkt_burst = mlx5_rx_burst;
}
+
+/**
+ * Synchronize with system time. We get the best (min) from 10 attempts
+ * to minimize shift from sys time and HCA clocks.
+ *
+ * @param ibv_context
+ * Pointer to IB verbs context.
+ * @param st
+ * Pointer to store system time.
+ * @param st_ns
+ * Pointer to store system time represented in ns.
+ * @param hw_clock
+ * Pointer to store HCA HW clock.
+ *
+ * @return
+ * 0 on success, -1 value on failure.
+ */
+static int
+mlx5_sync_clocks(struct ibv_context *ibv_ctx, struct timespec *st,
+ volatile uint64_t *st_ns, volatile uint64_t *hw_clock)
+{
+ struct timespec st1, st2, diff, st_min = TIMESPEC_INITIALIZER;
+ struct ibv_exp_values query_val = {0};
+ int64_t interval, best_interval = 0;
+ uint64_t hw_clock_min = 0;
+
+ memset(&query_val, 0, sizeof(query_val));
+ query_val.comp_mask = IBV_EXP_VALUES_HW_CLOCK;
+ for (int i = 0 ; i < 10 ; ++i) {
+ clock_gettime(CLOCK_REALTIME, &st1);
+ if (ibv_exp_query_values(ibv_ctx, IBV_EXP_VALUES_HW_CLOCK,
+ &query_val) || !query_val.hwclock)
+ return -1;
+ clock_gettime(CLOCK_REALTIME, &st2);
+ interval = (st2.tv_sec - st1.tv_sec) * NSEC_PER_SEC +
+ (st2.tv_nsec - st1.tv_nsec);
+
+ if (!best_interval || interval < best_interval) {
+ best_interval = interval;
+ hw_clock_min = query_val.hwclock;
+
+ interval /= 2;
+ diff.tv_sec = interval / NSEC_PER_SEC;
+ diff.tv_nsec = interval - (diff.tv_sec * NSEC_PER_SEC);
+ rte_timespec_add(&st1, &diff, &st_min);
+ }
+ }
+ *st = st_min;
+ *st_ns = st->tv_sec * NSEC_PER_SEC + st->tv_nsec;
+ *hw_clock = hw_clock_min;
+ return 0;
+}
+
+/**
+ * Periodic function to run by rte_eal_alarm and to synchronize with system
+ * time and calculate HCA HW сlock deviation. The deviation will be included
+ * into timestamp calculation in RX/TX callbacks.
+ *
+ * @param arg
+ * Void pointer to struct priv.
+ *
+ */
+static void
+mlx5_fix_hw_clock_deviation_handler(void *arg)
+{
+ struct priv *pv = arg;
+ struct timespec current_time, diff_systime;
+ uint64_t diff_hw_clock, hw_clock, estimated_hw_clock;
+ uint64_t systime_ns, diff_systime_ns;
+ int64_t clock_deviation_hw;
+ volatile struct mlx5_timestamp_sync *ts = &pv->timesync.sync_timestamp;
+
+ if (!ts->port_clock_frequency)
+ return;
+ if (mlx5_sync_clocks(pv->ctx, ¤t_time, &systime_ns, &hw_clock))
+ return;
+ /* time between current and previous time sync */
+ rte_timespec_sub(¤t_time, &pv->timesync.sync_systime,
+ &diff_systime);
+ /* also clocks */
+ diff_hw_clock = hw_clock - ts->sync_hw_clock;
+ diff_systime_ns = rte_timespec_to_ns(&diff_systime);
+ estimated_hw_clock = (diff_systime.tv_sec * ts->port_clock_frequency) +
+ (diff_systime.tv_nsec * ts->port_clock_frequency /
+ NSEC_PER_SEC);
+ clock_deviation_hw = estimated_hw_clock - diff_hw_clock;
+ priv_lock(pv);
+ if (abs(clock_deviation_hw) >= MLX5_CLOCK_DEVIATION_THRESHOLD) {
+ ts->port_clock_frequency = (diff_hw_clock * NSEC_PER_SEC) /
+ diff_systime_ns;
+ ts->mskd_duration = (NSEC_PER_SEC << 30) /
+ ts->port_clock_frequency;
+ }
+ ts->sync_hw_clock = hw_clock;
+ ts->sync_time_ns = systime_ns;
+ pv->timesync.sync_systime = current_time;
+ DEBUG("%ld.%09ld since last fix, time_ns: %lu estimate_hw_clock = %ld,"
+ "diff_hw_clock = %ld, deviation = %ld, freq = %ld durat: %lu",
+ diff_systime.tv_sec, diff_systime.tv_nsec, systime_ns,
+ estimated_hw_clock, diff_hw_clock, clock_deviation_hw,
+ ts->port_clock_frequency, ts->mskd_duration);
+ /* update all queues */
+ for (uint32_t i = 0; i != pv->rxqs_n; i++) {
+ struct rxq *rxq = (*pv->rxqs)[i];
+
+ if (rxq == NULL)
+ continue;
+ rxq->timestamps_enabled = pv->timesync_en;
+ rxq->timesync = *ts;
+ }
+ priv_unlock(pv);
+ rte_eal_alarm_set(MLX5_ALARM_CLOCK_DEVIATION_US,
+ mlx5_fix_hw_clock_deviation_handler,
+ (void *)pv);
+}
+
+/**
+ * Return HCA port clock frequency in Hz.
+ *
+ * @param priv
+ * Pointer to private structure.
+ */
+static uint64_t
+mlx5_get_port_clock_frequency(struct priv *pv)
+{
+ struct ibv_exp_device_attr exp_device_attr;
+ exp_device_attr.comp_mask = IBV_EXP_DEVICE_ATTR_WITH_HCA_CORE_CLOCK;
+ if (ibv_exp_query_device(pv->ctx, &exp_device_attr)) {
+ ERROR("ibv_exp_query_device() failed");
+ return 0;
+ }
+ return exp_device_attr.hca_core_clock * 1000; /* orig in KHz */
+}
+
+/**
+ * DPDK callback to enable timestamping. rte_mbuf.timestamp will hold
+ * value of the packet in ns.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+mlx5_timesync_enable(struct rte_eth_dev *dev)
+{
+ struct priv *pv = dev->data->dev_private;
+ struct mlx5_timesync *tso = &pv->timesync;
+ volatile struct mlx5_timestamp_sync *sts = &tso->sync_timestamp;
+
+ priv_lock(pv);
+ sts->port_clock_frequency = mlx5_get_port_clock_frequency(pv);
+ if (!sts->port_clock_frequency) {
+ pv->timesync_en = 0;
+ INFO("Timesync disabled: %d as port clock frequency is 0",
+ pv->timesync_en);
+ priv_unlock(pv);
+ return -ENOTSUP;
+ }
+ INFO("port %u Clock frequency: %lu Hz", pv->port,
+ sts->port_clock_frequency);
+ if (mlx5_sync_clocks(pv->ctx, &tso->sync_systime, &sts->sync_time_ns,
+ &sts->sync_hw_clock)) {
+ pv->timesync_en = 0;
+ INFO("Timesync disabled: %d", pv->timesync_en);
+ priv_unlock(pv);
+ return -ENOTSUP;
+ }
+ sts->mskd_duration = (NSEC_PER_SEC << 30) / sts->port_clock_frequency;
+ pv->timesync_en = 1;
+ DEBUG("%p: Timesync enabled, masked duration: %lu", (void *)dev,
+ sts->mskd_duration);
+ rte_eal_alarm_set(1000,
+ mlx5_fix_hw_clock_deviation_handler,
+ pv);
+ DEBUG("%p: sync_systime: %lu.%lu, time_ns: %lu sync_hw_clock: %lu",
+ (void *)dev, tso->sync_systime.tv_sec,
+ tso->sync_systime.tv_nsec, sts->sync_time_ns, sts->sync_hw_clock);
+ /* update all queues */
+ for (uint32_t i = 0; i != pv->rxqs_n; i++) {
+ struct rxq *rxq = (*pv->rxqs)[i];
+
+ if (rxq == NULL)
+ continue;
+ rxq->timestamps_enabled = pv->timesync_en;
+ rxq->timesync = *sts;
+ }
+ priv_unlock(pv);
+ return 0;
+}
+
+/**
+ * DPDK callback to disable timestamping. Value of rte_mbuf.timestamp is
+ * undefined.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+mlx5_timesync_disable(struct rte_eth_dev *dev)
+{
+ struct priv *pv = dev->data->dev_private;
+
+ pv->timesync_en = 0;
+ rte_eal_alarm_cancel(mlx5_fix_hw_clock_deviation_handler, pv);
+ for (uint32_t i = 0; i != pv->rxqs_n; i++) {
+ struct rxq *rxq = (*pv->rxqs)[i];
+ if (rxq == NULL)
+ continue;
+ rxq->timestamps_enabled = pv->timesync_en;
+ }
+ return 0;
+}
@@ -1258,6 +1258,8 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
rte_free(rxq_ctrl);
else {
rxq_ctrl->rxq.stats.idx = idx;
+ rxq_ctrl->rxq.timestamps_enabled = priv->timesync_en;
+ rxq_ctrl->rxq.timesync = priv->timesync.sync_timestamp;
DEBUG("%p: adding RX queue %p to list",
(void *)dev, (void *)rxq_ctrl);
(*priv->rxqs)[idx] = &rxq_ctrl->rxq;
@@ -1,8 +1,8 @@
/*-
* BSD LICENSE
*
- * Copyright 2015 6WIND S.A.
- * Copyright 2015 Mellanox.
+ * Copyright 2015-2016 6WIND S.A.
+ * Copyright 2015-2016 Mellanox.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -59,6 +59,7 @@
#include <rte_branch_prediction.h>
#include <rte_ether.h>
#include <rte_vect.h>
+#include <rte_time.h>
#ifdef PEDANTIC
#pragma GCC diagnostic error "-pedantic"
#endif
@@ -1385,6 +1386,20 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
len -= ETHER_CRC_LEN;
}
PKT_LEN(pkt) = len;
+ /* Calculate synchronized timestamp in ns */
+ if (unlikely(rxq->timestamps_enabled)) {
+ volatile struct mlx5_timestamp_sync *tso =
+ &rxq->timesync;
+ uint64_t clock_diff;
+ rte_prefetch0(tso);
+ clock_diff = ntohll(cqe->timestamp) -
+ tso->sync_hw_clock;
+ clock_diff = (clock_diff * tso->mskd_duration)
+ >> 30;
+ pkt->timestamp = tso->sync_time_ns +
+ clock_diff;
+ pkt->ol_flags |= PKT_RX_IEEE1588_TMST;
+ }
}
DATA_LEN(rep) = DATA_LEN(seg);
PKT_LEN(rep) = PKT_LEN(seg);
@@ -1,8 +1,8 @@
/*-
* BSD LICENSE
*
- * Copyright 2015 6WIND S.A.
- * Copyright 2015 Mellanox.
+ * Copyright 2015-2016 6WIND S.A.
+ * Copyright 2015-2016 Mellanox.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -60,6 +60,7 @@
#endif
#include "mlx5_utils.h"
+#include "mlx5_time.h"
#include "mlx5.h"
#include "mlx5_autoconf.h"
#include "mlx5_defs.h"
@@ -109,6 +110,7 @@ struct rxq {
unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
unsigned int vlan_strip:1; /* Enable VLAN stripping. */
unsigned int crc_present:1; /* CRC must be subtracted. */
+ unsigned int timestamps_enabled:1; /* timestamping enabled */
unsigned int sges_n:2; /* Log 2 of SGEs (max buffers per packet). */
unsigned int cqe_n:4; /* Log 2 of CQ elements. */
unsigned int elts_n:4; /* Log 2 of Mbufs. */
@@ -125,6 +127,7 @@ struct rxq {
struct rte_mbuf *(*elts)[];
struct rte_mempool *mp;
struct mlx5_rxq_stats stats;
+ volatile struct mlx5_timestamp_sync timesync; /* per queue copy */
} __rte_cache_aligned;
/* RX queue control descriptor. */
new file mode 100644
@@ -0,0 +1,53 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2016 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_PMD_MLX5_TIME_H_
+#define RTE_PMD_MLX5_TIME_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <limits.h>
+
+
+struct mlx5_timestamp_sync {
+ uint64_t sync_hw_clock; /* the last HW clocks */
+ uint64_t sync_time_ns; /* the last system time in ns */
+ uint64_t mskd_duration; /* adjusted masked duration */
+ uint64_t port_clock_frequency; /* in Hz */
+};
+
+struct mlx5_timesync {
+ volatile struct mlx5_timestamp_sync sync_timestamp;
+ struct timespec sync_systime; /* the last system time */
+};
+
+#endif /* RTE_PMD_MLX5_TIME_H_ */
@@ -116,6 +116,7 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
return;
}
DEBUG("%p: cleaning up and destroying hash RX queues", (void *)dev);
+ mlx5_timesync_disable(dev);
priv_special_flow_disable_all(priv);
priv_mac_addrs_disable(priv);
priv_destroy_hash_rxqs(priv);
@@ -37,6 +37,8 @@
#include <stdint.h>
#include <time.h>
+#define TIMESPEC_INITIALIZER {0, 0}
+
#define NSEC_PER_SEC 1000000000L
/**
@@ -127,4 +129,47 @@ rte_ns_to_timespec(uint64_t nsec)
return ts;
}
+/**
+ * Addition of two timespec times to result.
+ *
+ * @param a
+ * Pointer to the first time
+ * @param b
+ * Pointer to the second time
+ * @param res
+ * Pointer to result
+ *
+ */
+static inline void rte_timespec_add(struct timespec *a, struct timespec *b,
+ struct timespec *res)
+{
+ res->tv_sec = a->tv_sec + b->tv_sec;
+ res->tv_nsec = a->tv_nsec + b->tv_nsec;
+ if (res->tv_nsec >= NSEC_PER_SEC) {
+ ++res->tv_sec;
+ res->tv_nsec -= NSEC_PER_SEC;
+ }
+}
+
+/**
+ * Substruction of the first timespec by second to result.
+ *
+ * @param a
+ * Pointer to the first time
+ * @param b
+ * Pointer to the second time
+ * @param res
+ * Pointer to result
+ */
+static inline void rte_timespec_sub(struct timespec *a, struct timespec *b,
+ struct timespec *res)
+{
+ res->tv_sec = a->tv_sec - b->tv_sec;
+ res->tv_nsec = a->tv_nsec - b->tv_nsec;
+ if (res->tv_nsec < 0) {
+ --res->tv_sec;
+ res->tv_nsec += NSEC_PER_SEC;
+ }
+}
+
#endif /* _RTE_TIME_H_ */