@@ -58,6 +58,23 @@ needed, but the interface does not enforce that speed, for example::
--vdev=net_tap0,iface=foo0,speed=25000
+It is possible to specify a remote netdevice to capture packets from by adding
+``remote=foo1``, for example::
+
+ --vdev=net_tap,iface=tap0,remote=foo1
+
+If a ``remote`` is set, then all packets with the tap PMD's local MAC coming
+in on the remote netdevice will be redirected to the tap.
+If the tap is in promiscuous mode, then all packets will be redirected.
+In allmulti mode, all multicast packets will be redirected.
+It is possible to add explicit rte_flow rules on the tap PMD to capture specific
+traffic. For instance, in testpmd, the following rte_flow rule would capture
+packets with the given MAC address from the remote, and send it to the tap RX
+QUEUE 3::
+
+ testpmd> flow create 0 ingress pattern eth src is 02:03:04:05:06:07 / \
+ end actions queue index 3 / end
+
After the DPDK application is started you can send and receive packets on the
interface using the standard rx_burst/tx_burst APIs in DPDK. From the host
point of view you can use any host tool like tcpdump, Wireshark, ping, Pktgen
@@ -63,6 +63,7 @@
#define ETH_TAP_IFACE_ARG "iface"
#define ETH_TAP_SPEED_ARG "speed"
+#define ETH_TAP_REMOTE_ARG "remote"
#ifdef IFF_MULTI_QUEUE
#define RTE_PMD_TAP_MAX_QUEUES 16
@@ -77,6 +78,7 @@ static struct rte_vdev_driver pmd_tap_drv;
static const char *valid_arguments[] = {
ETH_TAP_IFACE_ARG,
ETH_TAP_SPEED_ARG,
+ ETH_TAP_REMOTE_ARG,
NULL
};
@@ -435,6 +437,7 @@ tap_dev_close(struct rte_eth_dev *dev __rte_unused)
struct pmd_internals *internals = dev->data->dev_private;
tap_link_set_down(dev);
+ tap_flow_implicit_flush(dev, NULL);
for (i = 0; i < internals->nb_queues; i++) {
if (internals->rxq[i].fd != -1)
@@ -480,6 +483,8 @@ tap_promisc_enable(struct rte_eth_dev *dev)
dev->data->promiscuous = 1;
tap_link_set_flags(pmd, IFF_PROMISC, 1);
+ if (pmd->remote_if_index)
+ tap_flow_implicit_create(pmd, TAP_REMOTE_PROMISC);
}
static void
@@ -489,6 +494,8 @@ tap_promisc_disable(struct rte_eth_dev *dev)
dev->data->promiscuous = 0;
tap_link_set_flags(pmd, IFF_PROMISC, 0);
+ if (pmd->remote_if_index)
+ tap_flow_implicit_destroy(dev, TAP_REMOTE_PROMISC);
}
static void
@@ -498,6 +505,8 @@ tap_allmulti_enable(struct rte_eth_dev *dev)
dev->data->all_multicast = 1;
tap_link_set_flags(pmd, IFF_ALLMULTI, 1);
+ if (pmd->remote_if_index)
+ tap_flow_implicit_create(pmd, TAP_REMOTE_ALLMULTI);
}
static void
@@ -507,6 +516,8 @@ tap_allmulti_disable(struct rte_eth_dev *dev)
dev->data->all_multicast = 0;
tap_link_set_flags(pmd, IFF_ALLMULTI, 0);
+ if (pmd->remote_if_index)
+ tap_flow_implicit_destroy(dev, TAP_REMOTE_ALLMULTI);
}
static void
@@ -632,9 +643,42 @@ tap_setup_queue(struct rte_eth_dev *dev,
pmd->name);
return fd;
}
+ if (pmd->remote_if_index) {
+ /*
+ * Flush usually returns negative value because it tries
+ * to delete every QDISC (and on a running device, one
+ * QDISC at least is needed). Ignore negative return
+ * value.
+ */
+ qdisc_flush(pmd->nlsk_fd, pmd->remote_if_index);
+ if (qdisc_create_ingress(pmd->nlsk_fd,
+ pmd->remote_if_index) < 0)
+ goto remote_fail;
+ LIST_INIT(&pmd->implicit_flows);
+ if (tap_flow_implicit_create(
+ pmd, TAP_REMOTE_LOCAL_MAC) < 0)
+ goto remote_fail;
+ if (tap_flow_implicit_create(
+ pmd, TAP_REMOTE_BROADCAST) < 0)
+ goto remote_fail;
+ if (tap_flow_implicit_create(
+ pmd, TAP_REMOTE_BROADCASTV6) < 0)
+ goto remote_fail;
+ if (tap_flow_implicit_create(
+ pmd, TAP_REMOTE_TX) < 0)
+ goto remote_fail;
+ }
}
return fd;
+
+remote_fail:
+ RTE_LOG(ERR, PMD,
+ "Could not set up remote flow rules for %s: remote disabled.\n",
+ pmd->name);
+ pmd->remote_if_index = 0;
+ tap_flow_implicit_flush(dev, NULL);
+ return fd;
}
static int
@@ -848,7 +892,7 @@ tap_kernel_support(struct pmd_internals *pmd)
}
static int
-eth_dev_tap_create(const char *name, char *tap_name)
+eth_dev_tap_create(const char *name, char *tap_name, char *remote_iface)
{
int numa_node = rte_socket_id();
struct rte_eth_dev *dev = NULL;
@@ -917,6 +961,13 @@ eth_dev_tap_create(const char *name, char *tap_name)
* creating/destroying flow rules.
*/
pmd->nlsk_fd = nl_init();
+ if (strlen(remote_iface)) {
+ pmd->remote_if_index = if_nametoindex(remote_iface);
+ if (!pmd->remote_if_index)
+ RTE_LOG(ERR, PMD, "Could not find %s ifindex: "
+ "remote interface will remain unconfigured\n",
+ remote_iface);
+ }
return 0;
@@ -957,6 +1008,19 @@ set_interface_speed(const char *key __rte_unused,
return 0;
}
+static int
+set_remote_iface(const char *key __rte_unused,
+ const char *value,
+ void *extra_args)
+{
+ char *name = (char *)extra_args;
+
+ if (value)
+ snprintf(name, RTE_ETH_NAME_MAX_LEN, "%s", value);
+
+ return 0;
+}
+
/* Open a TAP interface device.
*/
static int
@@ -966,10 +1030,12 @@ rte_pmd_tap_probe(const char *name, const char *params)
struct rte_kvargs *kvlist = NULL;
int speed;
char tap_name[RTE_ETH_NAME_MAX_LEN];
+ char remote_iface[RTE_ETH_NAME_MAX_LEN];
speed = ETH_SPEED_NUM_10G;
snprintf(tap_name, sizeof(tap_name), "%s%d",
DEFAULT_TAP_NAME, tap_unit++);
+ memset(remote_iface, 0, RTE_ETH_NAME_MAX_LEN);
if (params && (params[0] != '\0')) {
RTE_LOG(DEBUG, PMD, "paramaters (%s)\n", params);
@@ -993,6 +1059,15 @@ rte_pmd_tap_probe(const char *name, const char *params)
if (ret == -1)
goto leave;
}
+
+ if (rte_kvargs_count(kvlist, ETH_TAP_REMOTE_ARG) == 1) {
+ ret = rte_kvargs_process(kvlist,
+ ETH_TAP_REMOTE_ARG,
+ &set_remote_iface,
+ remote_iface);
+ if (ret == -1)
+ goto leave;
+ }
}
}
pmd_link.link_speed = speed;
@@ -1000,7 +1075,7 @@ rte_pmd_tap_probe(const char *name, const char *params)
RTE_LOG(NOTICE, PMD, "Initializing pmd_tap for %s as %s\n",
name, tap_name);
- ret = eth_dev_tap_create(name, tap_name);
+ ret = eth_dev_tap_create(name, tap_name, remote_iface);
leave:
if (ret == -1) {
@@ -1031,6 +1106,7 @@ rte_pmd_tap_remove(const char *name)
return 0;
tap_flow_flush(eth_dev, NULL);
+ tap_flow_implicit_flush(eth_dev, NULL);
internals = eth_dev->data->dev_private;
if (internals->flower_support && internals->nlsk_fd)
@@ -71,6 +71,8 @@ struct pmd_internals {
int flower_support; /* 1 if kernel supports, else 0 */
int flower_vlan_support; /* 1 if kernel supports, else 0 */
LIST_HEAD(tap_flows, rte_flow) flows; /* rte_flow rules */
+ /* implicit rte_flow rules set when a remote device is active */
+ LIST_HEAD(tap_implicit_flows, rte_flow) implicit_flows;
struct rx_queue rxq[RTE_PMD_TAP_MAX_QUEUES]; /* List of RX queues */
struct tx_queue txq[RTE_PMD_TAP_MAX_QUEUES]; /* List of TX queues */
};
@@ -82,6 +82,7 @@ enum {
struct rte_flow {
LIST_ENTRY(rte_flow) next; /* Pointer to the next rte_flow structure */
+ struct rte_flow *remote_flow; /* associated remote flow */
struct nlmsg msg;
};
@@ -92,6 +93,12 @@ struct convert_data {
struct rte_flow *flow;
};
+struct remote_rule {
+ struct rte_flow_attr attr;
+ struct rte_flow_item items[2];
+ int mirred;
+};
+
static int tap_flow_create_eth(const struct rte_flow_item *item, void *data);
static int tap_flow_create_vlan(const struct rte_flow_item *item, void *data);
static int tap_flow_create_ipv4(const struct rte_flow_item *item, void *data);
@@ -249,6 +256,114 @@ static const struct tap_flow_items tap_flow_items[] = {
},
};
+static struct remote_rule implicit_rte_flows[TAP_REMOTE_MAX_IDX] = {
+ [TAP_REMOTE_LOCAL_MAC] = {
+ .attr = {
+ .group = MAX_GROUP,
+ .priority = PRIORITY_MASK - TAP_REMOTE_LOCAL_MAC,
+ .ingress = 1,
+ },
+ .items[0] = {
+ .type = RTE_FLOW_ITEM_TYPE_ETH,
+ .mask = &(const struct rte_flow_item_eth){
+ .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+ },
+ },
+ .items[1] = {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ .mirred = TCA_EGRESS_REDIR,
+ },
+ [TAP_REMOTE_BROADCAST] = {
+ .attr = {
+ .group = MAX_GROUP,
+ .priority = PRIORITY_MASK - TAP_REMOTE_BROADCAST,
+ .ingress = 1,
+ },
+ .items[0] = {
+ .type = RTE_FLOW_ITEM_TYPE_ETH,
+ .mask = &(const struct rte_flow_item_eth){
+ .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+ },
+ .spec = &(const struct rte_flow_item_eth){
+ .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+ },
+ },
+ .items[1] = {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ .mirred = TCA_EGRESS_MIRROR,
+ },
+ [TAP_REMOTE_BROADCASTV6] = {
+ .attr = {
+ .group = MAX_GROUP,
+ .priority = PRIORITY_MASK - TAP_REMOTE_BROADCASTV6,
+ .ingress = 1,
+ },
+ .items[0] = {
+ .type = RTE_FLOW_ITEM_TYPE_ETH,
+ .mask = &(const struct rte_flow_item_eth){
+ .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
+ },
+ .spec = &(const struct rte_flow_item_eth){
+ .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
+ },
+ },
+ .items[1] = {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ .mirred = TCA_EGRESS_MIRROR,
+ },
+ [TAP_REMOTE_PROMISC] = {
+ .attr = {
+ .group = MAX_GROUP,
+ .priority = PRIORITY_MASK - TAP_REMOTE_PROMISC,
+ .ingress = 1,
+ },
+ .items[0] = {
+ .type = RTE_FLOW_ITEM_TYPE_VOID,
+ },
+ .items[1] = {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ .mirred = TCA_EGRESS_MIRROR,
+ },
+ [TAP_REMOTE_ALLMULTI] = {
+ .attr = {
+ .group = MAX_GROUP,
+ .priority = PRIORITY_MASK - TAP_REMOTE_ALLMULTI,
+ .ingress = 1,
+ },
+ .items[0] = {
+ .type = RTE_FLOW_ITEM_TYPE_ETH,
+ .mask = &(const struct rte_flow_item_eth){
+ .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+ },
+ .spec = &(const struct rte_flow_item_eth){
+ .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+ },
+ },
+ .items[1] = {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ .mirred = TCA_EGRESS_MIRROR,
+ },
+ [TAP_REMOTE_TX] = {
+ .attr = {
+ .group = 0,
+ .priority = TAP_REMOTE_TX,
+ .egress = 1,
+ },
+ .items[0] = {
+ .type = RTE_FLOW_ITEM_TYPE_VOID,
+ },
+ .items[1] = {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ .mirred = TCA_EGRESS_MIRROR,
+ },
+};
+
/**
* Make as much checks as possible on an Ethernet item, and if a flow is
* provided, fill it appropriately with Ethernet info.
@@ -673,6 +788,47 @@ add_action_gact(struct rte_flow *flow, int action)
}
/**
+ * Transform a MIRRED action item in the provided flow for TC.
+ *
+ * @param[in, out] flow
+ * Flow to be filled.
+ * @param[in] ifindex
+ * Netdevice ifindex, where to mirror/redirect packet to.
+ * @param[in] action_type
+ * Either TCA_EGRESS_REDIR for redirection or TCA_EGRESS_MIRROR for mirroring.
+ *
+ * @return
+ * 0 if checks are alright, -1 otherwise.
+ */
+static int
+add_action_mirred(struct rte_flow *flow, uint16_t ifindex, uint16_t action_type)
+{
+ struct nlmsg *msg = &flow->msg;
+ size_t act_index = 1;
+ struct tc_mirred p = {
+ .eaction = action_type,
+ .ifindex = ifindex,
+ };
+
+ if (nlattr_nested_start(msg, TCA_FLOWER_ACT) < 0)
+ return -1;
+ if (nlattr_nested_start(msg, act_index++) < 0)
+ return -1;
+ nlattr_add(&msg->nh, TCA_ACT_KIND, sizeof("mirred"), "mirred");
+ if (nlattr_nested_start(msg, TCA_ACT_OPTIONS) < 0)
+ return -1;
+ if (action_type == TCA_EGRESS_MIRROR)
+ p.action = TC_ACT_PIPE;
+ else /* REDIRECT */
+ p.action = TC_ACT_STOLEN;
+ nlattr_add(&msg->nh, TCA_MIRRED_PARMS, sizeof(p), &p);
+ nlattr_nested_finish(msg); /* nested TCA_ACT_OPTIONS */
+ nlattr_nested_finish(msg); /* nested act_index */
+ nlattr_nested_finish(msg); /* nested TCA_FLOWER_ACT */
+ return 0;
+}
+
+/**
* Transform a QUEUE action item in the provided flow for TC.
*
* @param[in, out] flow
@@ -723,6 +879,15 @@ add_action_skbedit(struct rte_flow *flow, uint16_t queue)
* Perform verbose error reporting if not NULL.
* @param[in, out] flow
* Flow structure to update.
+ * @param[in] mirred
+ * If set to TCA_EGRESS_REDIR, provided actions will be replaced with a
+ * redirection to the tap netdevice, and the TC rule will be configured
+ * on the remote netdevice in pmd.
+ * If set to TCA_EGRESS_MIRROR, provided actions will be replaced with a
+ * mirroring to the tap netdevice, and the TC rule will be configured
+ * on the remote netdevice in pmd. Matching packets will thus be duplicated.
+ * If set to 0, the standard behavior is to be used: set correct actions for
+ * the TC rule, and apply it on the tap netdevice.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
@@ -733,7 +898,8 @@ priv_flow_process(struct pmd_internals *pmd,
const struct rte_flow_item items[],
const struct rte_flow_action actions[],
struct rte_flow_error *error,
- struct rte_flow *flow)
+ struct rte_flow *flow,
+ int mirred)
{
const struct tap_flow_items *cur_item = tap_flow_items;
struct convert_data data = {
@@ -760,15 +926,21 @@ priv_flow_process(struct pmd_internals *pmd,
flow->msg.t.tcm_info = TC_H_MAKE(prio << 16,
flow->msg.t.tcm_info);
}
- if (!attr->ingress) {
- rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR,
- NULL, "direction should be ingress");
- return -rte_errno;
- }
- /* rte_flow ingress is actually egress as seen in the kernel */
- if (attr->ingress && flow)
- flow->msg.t.tcm_parent = TC_H_MAKE(MULTIQ_MAJOR_HANDLE, 0);
if (flow) {
+ if (mirred) {
+ /*
+ * If attr->ingress, the rule applies on remote ingress
+ * to match incoming packets
+ * If attr->egress, the rule applies on tap ingress (as
+ * seen from the kernel) to deal with packets going out
+ * from the DPDK app.
+ */
+ flow->msg.t.tcm_parent = TC_H_MAKE(TC_H_INGRESS, 0);
+ } else {
+ /* Standard rule on tap egress (kernel standpoint). */
+ flow->msg.t.tcm_parent =
+ TC_H_MAKE(MULTIQ_MAJOR_HANDLE, 0);
+ }
/* use flower filter type */
nlattr_add(&flow->msg.nh, TCA_KIND, sizeof("flower"), "flower");
if (nlattr_nested_start(&flow->msg, TCA_OPTIONS) < 0)
@@ -821,6 +993,22 @@ priv_flow_process(struct pmd_internals *pmd,
data.eth_type);
}
}
+ if (mirred && flow) {
+ uint16_t if_index = pmd->if_index;
+
+ /*
+ * If attr->egress && mirred, then this is a special
+ * case where the rule must be applied on the tap, to
+ * redirect packets coming from the DPDK App, out
+ * through the remote netdevice.
+ */
+ if (attr->egress)
+ if_index = pmd->remote_if_index;
+ if (add_action_mirred(flow, if_index, mirred) < 0)
+ goto exit_action_not_supported;
+ else
+ goto end;
+ }
for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
int err = 0;
@@ -855,6 +1043,7 @@ priv_flow_process(struct pmd_internals *pmd,
if (err)
goto exit_action_not_supported;
}
+end:
if (flow)
nlattr_nested_finish(&flow->msg); /* nested TCA_OPTIONS */
return 0;
@@ -885,7 +1074,7 @@ tap_flow_validate(struct rte_eth_dev *dev,
{
struct pmd_internals *pmd = dev->data->dev_private;
- return priv_flow_process(pmd, attr, items, actions, error, NULL);
+ return priv_flow_process(pmd, attr, items, actions, error, NULL, 0);
}
/**
@@ -941,6 +1130,7 @@ tap_flow_create(struct rte_eth_dev *dev,
struct rte_flow_error *error)
{
struct pmd_internals *pmd = dev->data->dev_private;
+ struct rte_flow *remote_flow = NULL;
struct rte_flow *flow = NULL;
struct nlmsg *msg = NULL;
int err;
@@ -951,6 +1141,17 @@ tap_flow_create(struct rte_eth_dev *dev,
"can't create rule, ifindex not found");
goto fail;
}
+ /*
+ * No rules configured through standard rte_flow should be set on the
+ * priorities used by implicit rules.
+ */
+ if ((attr->group == MAX_GROUP) &&
+ attr->priority > (MAX_PRIORITY - TAP_REMOTE_MAX_IDX)) {
+ rte_flow_error_set(
+ error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+ NULL, "priority value too big");
+ goto fail;
+ }
flow = rte_malloc(__func__, sizeof(struct rte_flow), 0);
if (!flow) {
rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
@@ -962,7 +1163,7 @@ tap_flow_create(struct rte_eth_dev *dev,
NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE);
msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL));
tap_flow_set_handle(flow);
- if (priv_flow_process(pmd, attr, items, actions, error, flow))
+ if (priv_flow_process(pmd, attr, items, actions, error, flow, 0))
goto fail;
err = nl_send(pmd->nlsk_fd, &msg->nh);
if (err < 0) {
@@ -977,14 +1178,183 @@ tap_flow_create(struct rte_eth_dev *dev,
goto fail;
}
LIST_INSERT_HEAD(&pmd->flows, flow, next);
+ /**
+ * If a remote device is configured, a TC rule with identical items for
+ * matching must be set on that device, with a single action: redirect
+ * to the local pmd->if_index.
+ */
+ if (pmd->remote_if_index) {
+ remote_flow = rte_malloc(__func__, sizeof(struct rte_flow), 0);
+ if (!remote_flow) {
+ rte_flow_error_set(
+ error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+ "cannot allocate memory for rte_flow");
+ goto fail;
+ }
+ msg = &remote_flow->msg;
+ /* set the rule if_index for the remote netdevice */
+ tc_init_msg(
+ msg, pmd->remote_if_index, RTM_NEWTFILTER,
+ NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE);
+ msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL));
+ tap_flow_set_handle(remote_flow);
+ if (priv_flow_process(pmd, attr, items, NULL,
+ error, remote_flow, TCA_EGRESS_REDIR)) {
+ rte_flow_error_set(
+ error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "rte flow rule validation failed");
+ goto fail;
+ }
+ err = nl_send(pmd->nlsk_fd, &msg->nh);
+ if (err < 0) {
+ rte_flow_error_set(
+ error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "Failure sending nl request");
+ goto fail;
+ }
+ err = nl_recv_ack(pmd->nlsk_fd);
+ if (err < 0) {
+ rte_flow_error_set(
+ error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "overlapping rules");
+ goto fail;
+ }
+ flow->remote_flow = remote_flow;
+ }
return flow;
fail:
+ if (remote_flow)
+ rte_free(remote_flow);
if (flow)
rte_free(flow);
return NULL;
}
/**
+ * Add an implicit flow rule on the remote device to make sure traffic gets to
+ * the tap netdevice from there.
+ *
+ * @param pmd
+ * Pointer to private structure.
+ * @param[in] idx
+ * The idx in the implicit_rte_flows array specifying which rule to apply.
+ *
+ * @return -1 if the rule couldn't be applied, 0 otherwise.
+ */
+int tap_flow_implicit_create(struct pmd_internals *pmd,
+ enum implicit_rule_index idx)
+{
+ struct rte_flow_item *items = implicit_rte_flows[idx].items;
+ struct rte_flow_attr *attr = &implicit_rte_flows[idx].attr;
+ struct rte_flow_item_eth eth_local = { .type = 0 };
+ uint16_t if_index = pmd->remote_if_index;
+ struct rte_flow *remote_flow = NULL;
+ struct nlmsg *msg = NULL;
+ int err = 0;
+ struct rte_flow_item items_local[2] = {
+ [0] = {
+ .type = items[0].type,
+ .spec = ð_local,
+ .mask = items[0].mask,
+ },
+ [1] = {
+ .type = items[1].type,
+ }
+ };
+
+ remote_flow = rte_malloc(__func__, sizeof(struct rte_flow), 0);
+ if (!remote_flow) {
+ RTE_LOG(ERR, PMD, "Cannot allocate memory for rte_flow");
+ goto fail;
+ }
+ msg = &remote_flow->msg;
+ if (idx == TAP_REMOTE_TX) {
+ if_index = pmd->if_index;
+ } else if (idx == TAP_REMOTE_LOCAL_MAC) {
+ /*
+ * eth addr couldn't be set in implicit_rte_flows[] as it is not
+ * known at compile time.
+ */
+ memcpy(ð_local.dst, &pmd->eth_addr, sizeof(pmd->eth_addr));
+ items = items_local;
+ }
+ tc_init_msg(msg, if_index, RTM_NEWTFILTER,
+ NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE);
+ msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL));
+ tap_flow_set_handle(remote_flow);
+ if (priv_flow_process(pmd, attr, items, NULL, NULL,
+ remote_flow, implicit_rte_flows[idx].mirred)) {
+ RTE_LOG(ERR, PMD, "rte flow rule validation failed\n");
+ goto fail;
+ }
+ err = nl_send(pmd->nlsk_fd, &msg->nh);
+ if (err < 0) {
+ RTE_LOG(ERR, PMD, "Failure sending nl request");
+ goto fail;
+ }
+ err = nl_recv_ack(pmd->nlsk_fd);
+ if (err < 0) {
+ RTE_LOG(ERR, PMD,
+ "Kernel refused TC filter rule creation");
+ goto fail;
+ }
+ LIST_INSERT_HEAD(&pmd->implicit_flows, remote_flow, next);
+ return 0;
+fail:
+ if (remote_flow)
+ rte_free(remote_flow);
+ return -1;
+}
+
+/**
+ * Remove specific implicit flow rule on the remote device.
+ *
+ * @param pmd
+ * Pointer to private structure.
+ * @param[in] idx
+ * The idx in the implicit_rte_flows array specifying which rule to remove.
+ *
+ * @return -1 if one of the implicit rules couldn't be created, 0 otherwise.
+ */
+int tap_flow_implicit_destroy(struct rte_eth_dev *dev,
+ enum implicit_rule_index idx)
+{
+ struct pmd_internals *pmd = dev->data->dev_private;
+ struct rte_flow *remote_flow;
+ int cur_prio = -1;
+ int idx_prio = implicit_rte_flows[idx].attr.priority + PRIORITY_OFFSET;
+
+ for (remote_flow = LIST_FIRST(&pmd->implicit_flows);
+ remote_flow;
+ remote_flow = LIST_NEXT(remote_flow, next)) {
+ cur_prio = (remote_flow->msg.t.tcm_info >> 16) & PRIORITY_MASK;
+ if (cur_prio != idx_prio)
+ continue;
+ return tap_flow_destroy(dev, remote_flow, NULL);
+ }
+ return 0;
+}
+
+/**
+ * Destroy all implicit flows.
+ *
+ * @see rte_flow_flush()
+ */
+int
+tap_flow_implicit_flush(struct rte_eth_dev *dev, struct rte_flow_error *error)
+{
+ struct pmd_internals *pmd = dev->data->dev_private;
+ struct rte_flow *remote_flow;
+
+ while (!LIST_EMPTY(&pmd->implicit_flows)) {
+ remote_flow = LIST_FIRST(&pmd->implicit_flows);
+ if (tap_flow_destroy(dev, remote_flow, error) < 0)
+ return -1;
+ }
+ return 0;
+}
+
+/**
* Destroy a flow.
*
* @see rte_flow_destroy()
@@ -996,6 +1366,7 @@ tap_flow_destroy(struct rte_eth_dev *dev,
struct rte_flow_error *error)
{
struct pmd_internals *pmd = dev->data->dev_private;
+ struct rte_flow *remote_flow = flow->remote_flow;
int ret = 0;
LIST_REMOVE(flow, next);
@@ -1009,11 +1380,34 @@ tap_flow_destroy(struct rte_eth_dev *dev,
goto end;
}
ret = nl_recv_ack(pmd->nlsk_fd);
- if (ret < 0)
+ if (ret < 0) {
rte_flow_error_set(
error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
"couldn't receive kernel ack to our request");
+ goto end;
+ }
+ if (remote_flow) {
+ remote_flow->msg.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ remote_flow->msg.nh.nlmsg_type = RTM_DELTFILTER;
+
+ ret = nl_send(pmd->nlsk_fd, &remote_flow->msg.nh);
+ if (ret < 0) {
+ rte_flow_error_set(
+ error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "Failure sending nl request");
+ goto end;
+ }
+ ret = nl_recv_ack(pmd->nlsk_fd);
+ if (ret < 0) {
+ rte_flow_error_set(
+ error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "Failure trying to receive nl ack");
+ goto end;
+ }
+ }
end:
+ if (remote_flow)
+ rte_free(remote_flow);
rte_free(flow);
return ret;
}
@@ -36,6 +36,7 @@
#include <rte_flow.h>
#include <rte_flow_driver.h>
+#include <tap.h>
/**
* In TC, priority 0 means we require the kernel to allocate one for us.
@@ -49,10 +50,33 @@
#define GROUP_SHIFT 12
#define MAX_GROUP GROUP_MASK
+/**
+ * These index are actually in reversed order: their priority is processed
+ * by subtracting their value to the lowest priority (PRIORITY_MASK).
+ * Thus the first one will have the lowest priority in the end
+ * (but biggest value).
+ */
+enum implicit_rule_index {
+ TAP_REMOTE_TX,
+ TAP_REMOTE_BROADCASTV6,
+ TAP_REMOTE_BROADCAST,
+ TAP_REMOTE_ALLMULTI,
+ TAP_REMOTE_PROMISC,
+ TAP_REMOTE_LOCAL_MAC,
+ TAP_REMOTE_MAX_IDX,
+};
+
int tap_dev_filter_ctrl(struct rte_eth_dev *dev,
enum rte_filter_type filter_type,
enum rte_filter_op filter_op,
void *arg);
int tap_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error);
+int tap_flow_implicit_create(struct pmd_internals *pmd,
+ enum implicit_rule_index idx);
+int tap_flow_implicit_destroy(struct rte_eth_dev *dev,
+ enum implicit_rule_index idx);
+int tap_flow_implicit_flush(struct rte_eth_dev *dev,
+ struct rte_flow_error *error);
+
#endif /* _TAP_FLOW_H_ */