@@ -284,6 +284,8 @@
close(priv->nl_socket_rdma);
if (priv->mnl_socket)
mlx5_nl_flow_socket_destroy(priv->mnl_socket);
+ if (priv->mnl_rcvbuf)
+ mlx5_nl_flow_rcv_buf_destroy(priv->mnl_rcvbuf);
ret = mlx5_hrxq_ibv_verify(dev);
if (ret)
DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
@@ -1131,7 +1133,8 @@
if (vf && config.vf_nl_en)
mlx5_nl_mac_addr_sync(eth_dev);
priv->mnl_socket = mlx5_nl_flow_socket_create();
- if (!priv->mnl_socket) {
+ priv->mnl_rcvbuf = mlx5_nl_flow_rcv_buf_create();
+ if (!priv->mnl_socket || !priv->mnl_rcvbuf) {
err = -rte_errno;
DRV_LOG(WARNING,
"flow rules relying on switch offloads will not be"
@@ -1155,7 +1158,9 @@
" not be supported: %s: %s",
error.message, strerror(rte_errno));
mlx5_nl_flow_socket_destroy(priv->mnl_socket);
+ mlx5_nl_flow_rcv_buf_destroy(priv->mnl_rcvbuf);
priv->mnl_socket = NULL;
+ priv->mnl_rcvbuf = NULL;
}
}
TAILQ_INIT(&priv->flows);
@@ -1212,6 +1217,8 @@
close(priv->nl_socket_rdma);
if (priv->mnl_socket)
mlx5_nl_flow_socket_destroy(priv->mnl_socket);
+ if (priv->mnl_rcvbuf)
+ mlx5_nl_flow_rcv_buf_destroy(priv->mnl_rcvbuf);
if (own_domain_id)
claim_zero(rte_eth_switch_domain_free(priv->domain_id));
rte_free(priv);
@@ -163,6 +163,7 @@ struct mlx5_nl_flow_ptoi {
};
struct mnl_socket;
+struct mlx5_nl_rbuf;
struct priv {
LIST_ENTRY(priv) mem_event_cb; /* Called by memory event callback. */
@@ -229,6 +230,8 @@ struct priv {
/* UAR same-page access control required in 32bit implementations. */
#endif
struct mnl_socket *mnl_socket; /* Libmnl socket. */
+ struct mlx5_nl_rbuf *mnl_rcvbuf;
+ /* Buffer for receiving libmnl messages. */
};
#define PORT_ID(priv) ((priv)->dev_data->port_id)
@@ -414,5 +417,12 @@ int mlx5_nl_flow_init(struct mnl_socket *nl, unsigned int ifindex,
struct rte_flow_error *error);
struct mnl_socket *mlx5_nl_flow_socket_create(void);
void mlx5_nl_flow_socket_destroy(struct mnl_socket *nl);
+struct mlx5_nl_rbuf *mlx5_nl_flow_rcv_buf_create(void);
+void mlx5_nl_flow_rcv_buf_destroy(struct mlx5_nl_rbuf *rb);
+int mlx5_nl_flow_query_count(struct mnl_socket *nl,
+ void *fbuf,
+ struct mlx5_nl_rbuf *rbuf,
+ struct rte_flow_query_count *qc,
+ struct rte_flow_error *error);
#endif /* RTE_PMD_MLX5_H_ */
@@ -3370,13 +3370,20 @@ struct rte_flow *
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
-mlx5_flow_query_count(struct rte_flow *flow __rte_unused,
- void *data __rte_unused,
+mlx5_flow_query_count(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ void *data,
struct rte_flow_error *error)
{
+ struct priv *priv = dev->data->dev_private;
+ struct rte_flow_query_count *qc = data;
+
+ if (flow->nl_flow && priv->mnl_socket && priv->mnl_rcvbuf)
+ return mlx5_nl_flow_query_count(priv->mnl_socket,
+ flow->nl_flow,
+ priv->mnl_rcvbuf, qc, error);
#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
if (flow->modifier & MLX5_FLOW_MOD_COUNT) {
- struct rte_flow_query_count *qc = data;
uint64_t counters[2] = {0, 0};
struct ibv_query_counter_set_attr query_cs_attr = {
.cs = flow->counter->cs,
@@ -3423,7 +3430,7 @@ struct rte_flow *
* @see rte_flow_ops
*/
int
-mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
+mlx5_flow_query(struct rte_eth_dev *dev,
struct rte_flow *flow,
const struct rte_flow_action *actions,
void *data,
@@ -3436,7 +3443,7 @@ struct rte_flow *
case RTE_FLOW_ACTION_TYPE_VOID:
break;
case RTE_FLOW_ACTION_TYPE_COUNT:
- ret = mlx5_flow_query_count(flow, data, error);
+ ret = mlx5_flow_query_count(dev, flow, data, error);
break;
default:
return rte_flow_error_set(error, ENOTSUP,
@@ -11,6 +11,7 @@
#include <linux/pkt_cls.h>
#include <linux/pkt_sched.h>
#include <linux/rtnetlink.h>
+#include <linux/gen_stats.h>
#include <linux/tc_act/tc_gact.h>
#include <linux/tc_act/tc_mirred.h>
#include <netinet/in.h>
@@ -25,6 +26,7 @@
#include <rte_errno.h>
#include <rte_ether.h>
#include <rte_flow.h>
+#include <rte_malloc.h>
#include "mlx5.h"
#include "mlx5_autoconf.h"
@@ -147,6 +149,16 @@ struct tc_vlan {
#define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
#endif
+/**
+ * Structure for holding netlink message buffer of MNL_SOCKET_BUFFER_SIZE.
+ * Using this (8KB) buffer size ensures that netlink messages will never be
+ * truncated.
+ */
+struct mlx5_nl_rbuf {
+ uint8_t *buf;
+ uint16_t bsize;
+};
+
/** Parser state definitions for mlx5_nl_flow_trans[]. */
enum mlx5_nl_flow_trans {
INVALID,
@@ -169,6 +181,7 @@ enum mlx5_nl_flow_trans {
ACTION_OF_PUSH_VLAN,
ACTION_OF_SET_VLAN_VID,
ACTION_OF_SET_VLAN_PCP,
+ ACTION_COUNT,
END,
};
@@ -178,7 +191,7 @@ enum mlx5_nl_flow_trans {
ITEM_VOID, ITEM_PORT_ID, ACTIONS
#define ACTIONS_COMMON \
ACTION_VOID, ACTION_OF_POP_VLAN, ACTION_OF_PUSH_VLAN, \
- ACTION_OF_SET_VLAN_VID, ACTION_OF_SET_VLAN_PCP
+ ACTION_OF_SET_VLAN_VID, ACTION_OF_SET_VLAN_PCP, ACTION_COUNT
#define ACTIONS_FATE \
ACTION_PORT_ID, ACTION_DROP
@@ -204,6 +217,7 @@ enum mlx5_nl_flow_trans {
[ACTION_OF_PUSH_VLAN] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
[ACTION_OF_SET_VLAN_VID] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
[ACTION_OF_SET_VLAN_PCP] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
+ [ACTION_COUNT] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
[END] = NULL,
};
@@ -869,6 +883,11 @@ enum mlx5_nl_flow_trans {
goto trans;
++action;
break;
+ case ACTION_COUNT:
+ if (action->type != RTE_FLOW_ACTION_TYPE_COUNT)
+ goto trans;
+ ++action;
+ break;
case ACTION_PORT_ID:
if (action->type != RTE_FLOW_ACTION_TYPE_PORT_ID)
goto trans;
@@ -1042,11 +1061,11 @@ enum mlx5_nl_flow_trans {
* Unique 32-bit handle to use.
*/
void
-mlx5_nl_flow_brand(void *buf, uint32_t handle)
+mlx5_nl_flow_brand(void *buf, uint32_t handle __rte_unused)
{
struct tcmsg *tcm = mnl_nlmsg_get_payload(buf);
- tcm->tcm_handle = handle;
+ tcm->tcm_handle = handle & 0x7FFFFFFF; // MOTIH
}
/**
@@ -1141,6 +1160,256 @@ enum mlx5_nl_flow_trans {
}
/**
+ * Parse rtnetlink message attributes filling the attribute table with the info
+ * being retrieved.
+ *
+ * @param tb
+ * Attribute table to be filled.
+ * @param[out] max
+ * Maxinum entry in the attribute table.
+ * @param rte
+ * The attributes section in the message to be parsed.
+ * @param len
+ * The length of the attributes section in the message.
+ * @return
+ * 0 on successful extraction of action counts, -1 otherwise.
+ */
+static void
+tc_parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
+{
+ unsigned short type;
+
+ memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
+ while (RTA_OK(rta, len)) {
+ type = rta->rta_type;
+ if (type <= max && !tb[type])
+ tb[type] = rta;
+ rta = RTA_NEXT(rta, len);
+ }
+}
+
+/**
+ * Extract action counters from flower action.
+ *
+ * @param rta
+ * flower action stats properties in the Netlink message received.
+ * @param[out] qc
+ * Count statistics retrieved from the message query.
+ * @return
+ * 0 on successful extraction of action counts, -1 otherwise.
+ */
+static int
+tc_flow_extract_stats_attr(struct rtattr *rta, struct rte_flow_query_count *qc)
+{
+ struct rtattr *tbs[TCA_STATS_MAX + 1];
+
+ tc_parse_rtattr(tbs, TCA_STATS_MAX, RTA_DATA(rta), RTA_PAYLOAD(rta));
+ if (tbs[TCA_STATS_BASIC]) {
+ struct gnet_stats_basic bs = {0};
+
+ memcpy(&bs, RTA_DATA(tbs[TCA_STATS_BASIC]),
+ RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
+ sizeof(bs)));
+ qc->bytes = bs.bytes;
+ qc->hits = bs.packets;
+ qc->bytes_set = 1;
+ qc->hits_set = 1;
+ return 0;
+ }
+ return -1;
+}
+
+/**
+ * Parse flower single action retrieving the flow counters from it if present.
+ *
+ * @param arg
+ * flower action properties in the Netlink message received.
+ * @param[out] qc
+ * Count statistics retrieved from the message query.
+ * @return
+ * 0 on successful retrieval of action counts, -1 otherwise.
+ */
+static int
+tc_flow_parse_one_action(struct rtattr *arg, struct rte_flow_query_count *qc)
+{
+ struct rtattr *tb[TCA_ACT_MAX + 1];
+
+ if (arg == NULL)
+ return -1;
+ tc_parse_rtattr(tb, TCA_ACT_MAX, RTA_DATA(arg), RTA_PAYLOAD(arg));
+ if (tb[TCA_ACT_KIND] == NULL)
+ return -1;
+ if (tb[TCA_ACT_STATS])
+ return tc_flow_extract_stats_attr(tb[TCA_ACT_STATS], qc);
+ return -1;
+}
+
+/**
+ * Parse flower action section in the message, retrieving the flow counters
+ * from the first action that contains them.
+ * flow counters are stored in the actions defined by the flow and not in the
+ * flow itself, therefore we need to traverse the flower action in search for
+ * them.
+ *
+ * @param opt
+ * flower section in the Netlink message received.
+ * @param[out] qc
+ * Count statistics retrieved from the message query.
+ */
+static void
+tc_flow_parse_action(const struct rtattr *arg, struct rte_flow_query_count *qc)
+{
+ struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
+ int i;
+
+ if (arg == NULL)
+ return;
+ tc_parse_rtattr(tb, TCA_ACT_MAX_PRIO, RTA_DATA(arg), RTA_PAYLOAD(arg));
+ for (i = 0; i <= TCA_ACT_MAX_PRIO; i++)
+ if (tb[i])
+ if (tc_flow_parse_one_action(tb[i], qc) == 0)
+ break;
+}
+
+/**
+ * Parse Netlink reply on flower type of filters, retrieving the flow counters
+ * from it.
+ *
+ * @param opt
+ * flower section in the Netlink message received.
+ * @param[out] qc
+ * Count statistics retrieved from the message query.
+ */
+static void
+tc_flower_parse_opt(struct rtattr *opt,
+ struct rte_flow_query_count *qc)
+{
+ struct rtattr *tb[TCA_FLOWER_MAX + 1];
+
+ if (!opt)
+ return;
+ tc_parse_rtattr(tb, TCA_FLOWER_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt));
+ if (tb[TCA_FLOWER_ACT])
+ tc_flow_parse_action(tb[TCA_FLOWER_ACT], qc);
+}
+
+/**
+ * Parse Netlink reply on filter query, retrieving the flow counters.
+ *
+ * @param nlh
+ * Message received from Netlink.
+ * @param[out] qc
+ * Count statistics retrieved from the message query.
+ *
+ * @return
+ * MNL_CB_ERROR on error, MNL_CB_OK value otherwise.
+ */
+static int
+mlx5_nl_flow_parse_filter(const struct nlmsghdr *nlh,
+ struct rte_flow_query_count *qc)
+{
+ struct tcmsg *t = NLMSG_DATA(nlh);
+ int len = nlh->nlmsg_len;
+ struct rtattr *tb[TCA_MAX + 1] = { };
+
+ if (nlh->nlmsg_type != RTM_NEWTFILTER &&
+ nlh->nlmsg_type != RTM_GETTFILTER &&
+ nlh->nlmsg_type != RTM_DELTFILTER)
+ return MNL_CB_OK;
+ len -= NLMSG_LENGTH(sizeof(*t));
+ if (len < 0)
+ return MNL_CB_ERROR;
+ tc_parse_rtattr(tb, TCA_MAX, TCA_RTA(t), len);
+ if (tb[TCA_KIND])
+ if (strcmp(RTA_DATA(tb[TCA_KIND]), "flower") == 0)
+ tc_flower_parse_opt(tb[TCA_OPTIONS], qc);
+ return MNL_CB_OK;
+}
+
+/**
+ * A callback to parse Netlink reply on filter query attempting to retrieve the
+ * flow counters if present.
+ *
+ * @param nlh
+ * Message received from Netlink.
+ * @param[out] data
+ * pointer to the count statistics to be filled by the routine.
+ *
+ * @return
+ * MNL_CB_ERROR on error, MNL_CB_OK value otherwise.
+ */
+static int
+mlx5_nl_flow_parse_message(const struct nlmsghdr *nlh, void *data)
+{
+ struct rte_flow_query_count *qc = (struct rte_flow_query_count *)data;
+
+ switch (nlh->nlmsg_type) {
+ case NLMSG_NOOP:
+ return MNL_CB_OK;
+ case NLMSG_ERROR:
+ case NLMSG_OVERRUN:
+ return MNL_CB_ERROR;
+ default:
+ break;
+ }
+ return mlx5_nl_flow_parse_filter(nlh, qc);
+}
+
+/**
+ * Query a Netlink flow rule for its statistics.
+ *
+ * @param nl
+ * Libmnl socket to use.
+ * @param fbuf
+ * Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
+ * @param rbuf
+ * Buffer for holding Netlink response.
+ * @param[out] qc
+ * Count statistics retrieved by the query.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_nl_flow_query_count(struct mnl_socket *nl,
+ void *fbuf,
+ struct mlx5_nl_rbuf *rbuf,
+ struct rte_flow_query_count *qc,
+ struct rte_flow_error *error)
+{
+ struct nlmsghdr *nlh = fbuf;
+ uint32_t seq = random();
+ ssize_t ret;
+
+ if (qc == NULL)
+ return -EINVAL;
+
+ nlh->nlmsg_type = RTM_GETTFILTER;
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
+ nlh->nlmsg_seq = seq;
+ if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
+ goto error_exit;
+ ret = mnl_socket_recvfrom(nl, rbuf->buf, rbuf->bsize);
+ if (ret == -1)
+ goto error_exit;
+ while (ret > 0) {
+ ret = mnl_cb_run(rbuf->buf, ret, seq,
+ mnl_socket_get_portid(nl),
+ mlx5_nl_flow_parse_message, qc);
+ if (ret <= MNL_CB_STOP)
+ break;
+ ret = mnl_socket_recvfrom(nl, rbuf->buf, rbuf->bsize);
+ }
+ return 0;
+error_exit:
+ return rte_flow_error_set
+ (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL, "netlink: failed to read flow rule statistics");
+}
+
+/**
* Initialize ingress qdisc of a given network interface.
*
* @param nl
@@ -1226,3 +1495,48 @@ struct mnl_socket *
{
mnl_socket_close(nl);
}
+
+/**
+ * Create netlink receive buffer.
+ * Netlink queries my result a large netlink reply, in case of a short
+ * receive buffer a reply message may be truncated. To avoid this,
+ * we allocate a buffer of MNL_SOCKET_BUFFER_SIZE (which is system dependent
+ * and ususally a 8KB long). Using this buffer size ensures that netlink
+ * messages will be stored without truncating.
+ *
+ * @return
+ * pointer to mlx5_nl_rbuf created, NULL value otherwise.
+ */
+struct mlx5_nl_rbuf *
+mlx5_nl_flow_rcv_buf_create(void)
+{
+ struct mlx5_nl_rbuf *rbuf = rte_zmalloc(__func__,
+ sizeof(struct mlx5_nl_rbuf),
+ sizeof(uint32_t));
+ uint8_t *buf = rte_zmalloc(__func__,
+ MNL_SOCKET_BUFFER_SIZE,
+ sizeof(uint32_t));
+ if (!buf || !rbuf) {
+ rte_free(buf);
+ rte_free(rbuf);
+ return NULL;
+ }
+ rbuf->buf = buf;
+ rbuf->bsize = MNL_SOCKET_BUFFER_SIZE;
+ return rbuf;
+}
+
+/**
+ * Destroy mlx5_nl_rbuf.
+ *
+ * @param rb
+ * The receive buffer to destroy.
+ */
+void
+mlx5_nl_flow_rcv_buf_destroy(struct mlx5_nl_rbuf *rb)
+{
+ if (rb) {
+ rte_free(rb->buf);
+ rte_free(rb);
+ }
+}