[dpdk-dev] [PATCH v3 1/2] Add RIB library
Vladimir Medvedkin
medvedkinv at gmail.com
Sun Mar 25 20:35:35 CEST 2018
2018-03-15 17:27 GMT+03:00 Bruce Richardson <bruce.richardson at intel.com>:
> On Thu, Feb 22, 2018 at 10:50:55PM +0000, Medvedkin Vladimir wrote:
> > RIB is an alternative to current LPM library.
> > It solves the following problems
> > - Increases the speed of control plane operations against lpm such as
> > adding/deleting routes
> > - Adds abstraction from dataplane algorithms, so it is possible to add
> > different ip route lookup algorythms such as DXR/poptrie/lpc-trie/etc
> > in addition to current dir24_8
> > - It is possible to keep user defined application specific additional
> > information in struct rte_rib_node which represents route entry.
> > It can be next hop/set of next hops (i.e. active and feasible),
> > pointers to link rte_rib_node based on some criteria (i.e. next_hop),
> > plenty of additional control plane information.
> > - For dir24_8 implementation it is possible to remove
> > rte_lpm_tbl_entry.depth field that helps to save 6 bits.
> > - Also new dir24_8 implementation supports different next_hop sizes
> > (1/2/4/8 bytes per next hop)
> > - Removed RTE_LPM_LOOKUP_SUCCESS to save 1 bit and to eleminate
> > ternary operator.
> > Instead it returns special default value if there is no route.
> >
> > Signed-off-by: Medvedkin Vladimir <medvedkinv at gmail.com>
>
> More comments inline below. Mostly for rte_rib.c file.
>
> /Bruce
>
> > ---
> > config/common_base | 6 +
> > doc/api/doxy-api.conf | 1 +
> > lib/Makefile | 2 +
> > lib/librte_rib/Makefile | 23 ++
>
> Don't forget meson.build file too, to build with meson and ninja. [Strongly
> recommend it for your day-to-day development work too, incremental builds
> are much, much faster using ninja].
>
Will add
>
> > lib/librte_rib/rte_dir24_8.c | 482 ++++++++++++++++++++++++++++++
> +++
> > lib/librte_rib/rte_dir24_8.h | 115 ++++++++
> > lib/librte_rib/rte_rib.c | 526 ++++++++++++++++++++++++++++++
> +++++++
> > lib/librte_rib/rte_rib.h | 322 +++++++++++++++++++++++
> > lib/librte_rib/rte_rib_version.map | 18 ++
> > mk/rte.app.mk | 1 +
> > 10 files changed, 1496 insertions(+)
> > create mode 100644 lib/librte_rib/Makefile
> > create mode 100644 lib/librte_rib/rte_dir24_8.c
> > create mode 100644 lib/librte_rib/rte_dir24_8.h
> > create mode 100644 lib/librte_rib/rte_rib.c
> > create mode 100644 lib/librte_rib/rte_rib.h
> > create mode 100644 lib/librte_rib/rte_rib_version.map
> >
> > diff --git a/config/common_base b/config/common_base
> > index ad03cf4..aceeff5 100644
> > --- a/config/common_base
> > +++ b/config/common_base
> > @@ -679,6 +679,12 @@ CONFIG_RTE_LIBRTE_LPM=y
> > CONFIG_RTE_LIBRTE_LPM_DEBUG=n
> >
> > #
> > +# Compile librte_rib
> > +#
> > +CONFIG_RTE_LIBRTE_RIB=y
> > +CONFIG_RTE_LIBRTE_RIB_DEBUG=n
> > +
> > +#
> > # Compile librte_acl
> > #
> > CONFIG_RTE_LIBRTE_ACL=y
> > diff --git a/doc/api/doxy-api.conf b/doc/api/doxy-api.conf
> > index cda52fd..8e4f969 100644
> > --- a/doc/api/doxy-api.conf
> > +++ b/doc/api/doxy-api.conf
> > @@ -60,6 +60,7 @@ INPUT = doc/api/doxy-api-index.md \
> > lib/librte_kvargs \
> > lib/librte_latencystats \
> > lib/librte_lpm \
> > + lib/librte_rib \
> > lib/librte_mbuf \
> > lib/librte_member \
> > lib/librte_mempool \
> > diff --git a/lib/Makefile b/lib/Makefile
> > index ec965a6..e4faf10 100644
> > --- a/lib/Makefile
> > +++ b/lib/Makefile
> > @@ -43,6 +43,8 @@ DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd
> > DEPDIRS-librte_efd := librte_eal librte_ring librte_hash
> > DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
> > DEPDIRS-librte_lpm := librte_eal
> > +DIRS-$(CONFIG_RTE_LIBRTE_RIB) += librte_rib
> > +DEPDIRS-librte_rib := librte_eal librte_mempool
> > DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl
> > DEPDIRS-librte_acl := librte_eal
> > DIRS-$(CONFIG_RTE_LIBRTE_MEMBER) += librte_member
> > diff --git a/lib/librte_rib/Makefile b/lib/librte_rib/Makefile
> > new file mode 100644
> > index 0000000..f6431b6
> > --- /dev/null
> > +++ b/lib/librte_rib/Makefile
> > @@ -0,0 +1,23 @@
> > +# SPDX-License-Identifier: BSD-3-Clause
> > +# Copyright(c) 2018 Vladimir Medvedkin <medvedkinv at gmail.com>
> > +
> > +include $(RTE_SDK)/mk/rte.vars.mk
> > +
> > +# library name
> > +LIB = librte_rib.a
> > +
> > +CFLAGS += -O3
> > +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
> > +LDLIBS += -lrte_eal -lrte_mempool
> > +
> > +EXPORT_MAP := rte_rib_version.map
> > +
> > +LIBABIVER := 1
> > +
> > +# all source are stored in SRCS-y
> > +SRCS-$(CONFIG_RTE_LIBRTE_RIB) := rte_rib.c rte_dir24_8.c
> > +
> > +# install this header file
> > +SYMLINK-$(CONFIG_RTE_LIBRTE_RIB)-include := rte_rib.h rte_dir24_8.h
> > +
> > +include $(RTE_SDK)/mk/rte.lib.mk
> > diff --git a/lib/librte_rib/rte_dir24_8.c b/lib/librte_rib/rte_dir24_8.c
> > new file mode 100644
> > index 0000000..2fc55fe
> > --- /dev/null
> > +++ b/lib/librte_rib/rte_dir24_8.c
>
> For future patches, it would be good if you can split the dir24_8 code into
> a separate patch from the main rib code. The more you can split it into
> separate patch blocks, the easier it is to review.
>
Ok
>
> > @@ -0,0 +1,482 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv at gmail.com>
> > + */
>
> <snip>
>
> > diff --git a/lib/librte_rib/rte_rib.c b/lib/librte_rib/rte_rib.c
> > new file mode 100644
> > index 0000000..7783b23
> > --- /dev/null
> > +++ b/lib/librte_rib/rte_rib.c
> > @@ -0,0 +1,526 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv at gmail.com>
> > + */
> > +
> > +#include <stdint.h>
> > +#include <stdlib.h>
> > +#include <stdio.h>
> > +#include <string.h>
> > +#include <sys/queue.h>
> > +
> > +#include <rte_eal.h>
> > +#include <rte_eal_memconfig.h>
> > +#include <rte_common.h>
> > +#include <rte_tailq.h>
> > +#include <rte_errno.h>
> > +#include <rte_rwlock.h>
> > +#include <rte_memory.h>
> > +#include <rte_memzone.h>
> > +#include <rte_mempool.h>
> > +#include <rte_malloc.h>
> > +#include <rte_log.h>
> > +
> > +#include <rte_rib.h>
> > +#include <rte_dir24_8.h>
> > +
> > +TAILQ_HEAD(rte_rib_list, rte_tailq_entry);
> > +static struct rte_tailq_elem rte_rib_tailq = {
> > + .name = "RTE_RIB",
> > +};
> > +EAL_REGISTER_TAILQ(rte_rib_tailq)
> > +
> > +static struct rte_rib_node *
> > +new_node_malloc(struct rte_rib *rib)
> > +{
> > + struct rte_rib_node *ent;
> > +
> > + ent = malloc(rib->node_sz);
> > + if (unlikely(ent == NULL))
> > + return NULL;
> > + ++rib->cur_nodes;
> > + return ent;
> > +}
> > +
> > +static void
> > +free_node_malloc(__rte_unused struct rte_rib *rib, struct rte_rib_node
> *ent)
> > +{
> > + --rib->cur_nodes;
> > + free(ent);
> > +}
> > +
> > +static struct rte_rib_node *
> > +new_node_mempool(struct rte_rib *rib)
> > +{
> > + struct rte_rib_node *ent;
> > + int ret;
> > +
> > + ret = rte_mempool_get(rib->node_pool, (void *)&ent);
> > + if (unlikely(ret != 0))
> > + return NULL;
> > + ++rib->cur_nodes;
> > + return ent;
> > +}
> > +
> > +static void
> > +free_node_mempool(struct rte_rib *rib, struct rte_rib_node *ent)
> > +{
> > + --rib->cur_nodes;
> > + rte_mempool_put(rib->node_pool, ent);
> > +}
> > +
> > +struct rte_rib_node *
> > +rte_rib_tree_lookup(struct rte_rib *rib, uint32_t key)
> > +{
> > + struct rte_rib_node *cur = rib->trie;
> > + struct rte_rib_node *prev = NULL;
> > +
> > + while ((cur != NULL) && (((cur->key ^ key) &
> > + (uint32_t)(UINT64_MAX << (32 - cur->depth))) == 0)) {
> > + if ((cur->flag & RTE_RIB_VALID_NODE) == RTE_RIB_VALID_NODE)
> > + prev = cur;
> > + cur = RTE_RIB_GET_NXT_NODE(cur, key);
> > + }
> > + return prev;
> > +}
> > +
> > +struct rte_rib_node *
> > +rte_rib_tree_lookup_parent(struct rte_rib_node *ent)
> > +{
> > + struct rte_rib_node *tmp;
> > +
> > + if (ent == NULL)
> > + return NULL;
> > + tmp = ent->parent;
> > + while ((tmp != NULL) &&
> > + (tmp->flag & RTE_RIB_VALID_NODE) != RTE_RIB_VALID_NODE) {
> > + tmp = tmp->parent;
> > +}
> Watch indentation here. The close brace obviously needs an indent, and the
> line continuation of while should have different indent to body. Coding
> standards suggest double indent on the continuation, ie. two tabs before
> "(tmp->flag".
>
Ah, I missed it, thanks
>
> > + return tmp;
> > +}
> > +
> > +struct rte_rib_node *
> > +rte_rib_tree_lookup_exact(struct rte_rib *rib, uint32_t key, uint8_t
> depth)
> > +{
> > + struct rte_rib_node *cur = rib->trie;
> > +
> > + key &= (uint32_t)(UINT64_MAX << (32 - depth));
> This mask generation seems a common idiom. Maybe make an inline fn or macro
> out of it.
>
Got it
>
> > + while (cur != NULL) {
> > + if ((cur->key == key) && (cur->depth == depth) &&
> > + (cur->flag & RTE_RIB_VALID_NODE))
> > + return cur;
> > + if ((cur->depth > depth) ||
> > + (((uint64_t)key >> (32 - cur->depth)) !=
> > + ((uint64_t)cur->key >> (32 - cur->depth))))
> > + break;
> > + cur = RTE_RIB_GET_NXT_NODE(cur, key);
> > + }
> > + return NULL;
> > +}
> > +
> > +struct rte_rib_node *
> > +rte_rib_tree_get_nxt(struct rte_rib *rib, uint32_t key,
> > + uint8_t depth, struct rte_rib_node *cur, int flag)
> > +{
> > + struct rte_rib_node *tmp, *prev = NULL;
> > +
> > + if (cur == NULL) {
> > + tmp = rib->trie;
> > + while ((tmp) && (tmp->depth < depth))
> > + tmp = RTE_RIB_GET_NXT_NODE(tmp, key);
> > + } else {
> > + tmp = cur;
> > + while ((tmp->parent != NULL) &&
> (RTE_RIB_IS_RIGHT_NODE(tmp) ||
> > + (tmp->parent->right == NULL))) {
> > + tmp = tmp->parent;
> > + if ((tmp->flag & RTE_RIB_VALID_NODE) &&
> > + (RTE_RIB_IS_COVERED(tmp->key, tmp->depth,
> > + key, depth)))
> > + return tmp;
> > + }
> > + tmp = (tmp->parent) ? tmp->parent->right : NULL;
> > + }
> > + while (tmp) {
> > + if ((tmp->flag & RTE_RIB_VALID_NODE) &&
> > + (RTE_RIB_IS_COVERED(tmp->key, tmp->depth,
> > + key, depth))) {
> > + prev = tmp;
> > + if (flag == RTE_RIB_GET_NXT_COVER)
> > + return prev;
> > + }
> > + tmp = (tmp->left) ? tmp->left : tmp->right;
> > + }
> > + return prev;
> > +}
>
> I think this function could do with some comments explaining the logic
> behind it.
>
This function traverses on subtree and retrieves more specific routes for a
given in args key/depth prefix (treat it like a top of the subtree).
Traverse without using recursion but using some kind of stack. It uses *cur
argument like a pointer to the last returned node to resume retrieval after
cur node.
> > +
> > +void
> > +rte_rib_tree_remove(struct rte_rib *rib, uint32_t key, uint8_t depth)
> > +{
> > + struct rte_rib_node *cur, *prev, *child;
> > +
> > + cur = rte_rib_tree_lookup_exact(rib, key, depth);
> > + if (cur == NULL)
> > + return;
> > +
> > + --rib->cur_routes;
> > + cur->flag &= ~RTE_RIB_VALID_NODE;
> > + while ((cur->flag & RTE_RIB_VALID_NODE) != RTE_RIB_VALID_NODE) {
> > + if ((cur->left != NULL) && (cur->right != NULL))
> > + return;
> > + child = (cur->left == NULL) ? cur->right : cur->left;
> > + if (child != NULL)
> > + child->parent = cur->parent;
> > + if (cur->parent == NULL) {
> > + rib->trie = child;
> > + rib->free_node(rib, cur);
> > + return;
> > + }
> > + if (cur->parent->left == cur)
> > + cur->parent->left = child;
> > + else
> > + cur->parent->right = child;
> > + prev = cur;
> > + cur = cur->parent;
> > + rib->free_node(rib, prev);
> > + }
> > +}
> > +
> > +struct rte_rib_node *
> > +rte_rib_tree_insert(struct rte_rib *rib, uint32_t key, uint8_t depth)
> > +{
> > + struct rte_rib_node **tmp = &rib->trie;
> > + struct rte_rib_node *prev = NULL;
> > + struct rte_rib_node *new_node = NULL;
> > + struct rte_rib_node *common_node = NULL;
> > + int i = 0;
> > + uint32_t common_prefix;
> > + uint8_t common_depth;
> > +
> > + if (depth > 32) {
> > + rte_errno = EINVAL;
> > + return NULL;
> > + }
> > +
> > + key &= (uint32_t)(UINT64_MAX << (32 - depth));
> > + new_node = rte_rib_tree_lookup_exact(rib, key, depth);
> > + if (new_node != NULL) {
> > + rte_errno = EEXIST;
> > + return NULL;
> > + }
> > +
> > + new_node = rib->alloc_node(rib);
> > + if (new_node == NULL) {
> > + rte_errno = ENOMEM;
> > + return NULL;
> > + }
> > + new_node->left = NULL;
> > + new_node->right = NULL;
> > + new_node->parent = NULL;
> > + new_node->key = key;
> > + new_node->depth = depth;
> > + new_node->flag = RTE_RIB_VALID_NODE;
> > +
> > + while (1) {
> > + if (*tmp == NULL) {
> > + *tmp = new_node;
> > + new_node->parent = prev;
> > + }
>
> I think it would be clearer to have a return in this block, rather than
> having fallthrough to the next one.
>
Ok
>
> > + if ((key == (*tmp)->key) && (depth == (*tmp)->depth)) {
> > + if (new_node != *tmp) {
> > + rib->free_node(rib, new_node);
> > + (*tmp)->flag |= RTE_RIB_VALID_NODE;
> > + }
> > + ++rib->cur_routes;
> > + return *tmp;
> > + }
>
> Comment in the above block please. My understanding is that the previous
> search just confirmed that there wasn't already a valid entry present for
> this new item, but did not report if there was already an invalid entry,
> which is what this block is matching.
>
Right.
In this while loop it traverse down the tree to find matching node (or find
closest matching).
In the block above if node matches to search criteria ("if ((key ==
(*tmp)->key) && (depth == (*tmp)->depth))")
it means there is already inserted node (as an intermediate node) with
proper criteria but with RTE_RIB_VALID_NODE flag (becaue of
rte_rib_tree_lookup_exact returns NULL). So it frees new allocated node
(new_node), inits flag and so on. The only case I check for equality
between *tmp and new_node ("if (new_node != *tmp)") is fallthrough for case
above ("if (*tmp == NULL)"). But if there will be return (as you mentioned
early), it is possible to eliminate if statement ("if (new_node != *tmp)").
>
> > + i = (*tmp)->depth;
>
> I think "d" might be a better choice of name here, rather than "i", which
> you expect to be a loop variable.
>
got it
>
> > + if ((i >= depth) || (((uint64_t)key >> (32 - i)) !=
> > + ((uint64_t)(*tmp)->key >> (32 - i))))
> > + break;
> > + prev = *tmp;
> > + tmp = (key & (1 << (31 - i))) ? &(*tmp)->right :
> &(*tmp)->left;
> > + }
> > + common_depth = RTE_MIN(depth, (*tmp)->depth);
> > + common_prefix = key ^ (*tmp)->key;
> > + i = __builtin_clz(common_prefix);
> > +
> > + common_depth = RTE_MIN(i, common_depth);
> > + common_prefix = key & (uint32_t)(UINT64_MAX << (32 -
> common_depth));
> > + if ((common_prefix == key) && (common_depth == depth)) {
> > + if ((*tmp)->key & (1 << (31 - depth)))
> > + new_node->right = *tmp;
> > + else
> > + new_node->left = *tmp;
> > + new_node->parent = (*tmp)->parent;
> > + (*tmp)->parent = new_node;
> > + *tmp = new_node;
> > + } else {
> > + common_node = rib->alloc_node(rib);
> > + if (common_node == NULL) {
> > + rib->free_node(rib, new_node);
> > + rte_errno = ENOMEM;
> > + return NULL;
> > + }
> > + common_node->key = common_prefix;
> > + common_node->depth = common_depth;
> > + common_node->flag = 0;
> > + common_node->parent = (*tmp)->parent;
> > + new_node->parent = common_node;
> > + (*tmp)->parent = common_node;
> > + if ((new_node->key & (1 << (31 - common_depth))) == 0) {
> > + common_node->left = new_node;
> > + common_node->right = *tmp;
> > + } else {
> > + common_node->left = *tmp;
> > + common_node->right = new_node;
> > + }
> > + *tmp = common_node;
> > + }
>
> Again some commenting of the logic here would help the reader.
After closest matching prefix was found in a while loop it finds common
(for a given key/depth and found closest) key and prefix length.
After it whether inserts new node as a parent for found prefix (if
statement "((common_prefix == key) && (common_depth == depth))" is true) or
as a child in otherwise.
> > + ++rib->cur_routes;
> > + return new_node;
> > +}
> > +
> > +struct rte_rib *
> > +rte_rib_create(const char *name, int socket_id, struct rte_rib_conf
> *conf)
> > +{
> > + char mem_name[RTE_RIB_NAMESIZE];
> > + struct rte_rib *rib = NULL;
> > + struct rte_tailq_entry *te;
> > + struct rte_rib_list *rib_list;
> > + struct rte_mempool *node_pool = NULL;
> > + enum rte_dir24_8_nh_sz dir24_8_nh_size;
> > +
> > + /* Check user arguments. */
> > + if ((name == NULL) || (conf == NULL) || (socket_id < -1) ||
> > + (conf->type >= RTE_RIB_TYPE_MAX) ||
> > + (conf->alloc_type >= RTE_RIB_ALLOC_MAX) ||
> > + (conf->max_nodes == 0) ||
> > + (conf->node_sz < sizeof(struct rte_rib_node))) {
> > + rte_errno = EINVAL;
> > + return NULL;
> > + }
> > +
> > + if (conf->alloc_type == RTE_RIB_MEMPOOL) {
>
> Since you are forcing the user always to specify the max number of nodes,
> why not always use mempool allocation type? What is the use-case for
> malloc-based allocation instead?
>
Malloc based allocation was done in a first incarnation. As I wrote earlier
I think to remove malloc. On performance tests with adding/deleting huge
amount of routes malloc is slower.
> > + snprintf(mem_name, sizeof(mem_name), "MP_%s", name);
> > + node_pool = rte_mempool_create(mem_name, conf->max_nodes,
> > + conf->node_sz, 0, 0, NULL, NULL, NULL, NULL,
> > + socket_id, 0);
> > +
> > + if (node_pool == NULL) {
> > + RTE_LOG(ERR, LPM,
> > + "Can not allocate mempool for RIB %s\n",
> name);
> > + rte_errno = ENOMEM;
> > + return NULL;
> > + }
> > +
> > + }
> > +
> > + snprintf(mem_name, sizeof(mem_name), "RIB_%s", name);
> > +
> > + rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
> > +
> > + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
> > + /* guarantee there's no existing */
> > + TAILQ_FOREACH(te, rib_list, next) {
> > + rib = (struct rte_rib *)te->data;
> > + if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0)
> > + break;
> > + }
> > + rib = NULL;
> > + if (te != NULL) {
> > + rte_errno = EEXIST;
> > + goto exit;
> > + }
> > +
> > + /* allocate tailq entry */
> > + te = rte_zmalloc("RIB_TAILQ_ENTRY", sizeof(*te), 0);
> > + if (te == NULL) {
> > + RTE_LOG(ERR, LPM,
> > + "Can not allocate tailq entry for RIB %s\n", name);
> > + rte_errno = ENOMEM;
> > + goto exit;
> > + }
> > +
> > + /* Allocate memory to store the LPM data structures. */
> > + rib = (struct rte_rib *)rte_zmalloc_socket(mem_name,
> > + sizeof(struct rte_rib), RTE_CACHE_LINE_SIZE, socket_id);
> > + if (rib == NULL) {
> > + RTE_LOG(ERR, LPM, "RIB %s memory allocation failed\n",
> name);
> > + rte_errno = ENOMEM;
> > + goto free_te;
> > + }
> > + snprintf(rib->name, sizeof(rib->name), "%s", name);
> > + rib->trie = NULL;
> > + rib->max_nodes = conf->max_nodes;
> > + rib->node_sz = conf->node_sz;
> > + rib->type = conf->type;
> > + rib->alloc_type = conf->alloc_type;
> > +
> > + if (conf->type <= RTE_RIB_DIR24_8_8B) {
> > + switch (conf->type) {
> > + case RTE_RIB_DIR24_8_1B:
> > + dir24_8_nh_size = RTE_DIR24_8_1B;
> > + rib->lookup = rte_dir24_8_lookup_bulk_1b;
> > + break;
> > + case RTE_RIB_DIR24_8_2B:
> > + dir24_8_nh_size = RTE_DIR24_8_2B;
> > + rib->lookup = rte_dir24_8_lookup_bulk_2b;
> > + break;
> > + case RTE_RIB_DIR24_8_4B:
> > + dir24_8_nh_size = RTE_DIR24_8_4B;
> > + rib->lookup = rte_dir24_8_lookup_bulk_4b;
> > + break;
> > + case RTE_RIB_DIR24_8_8B:
> > + dir24_8_nh_size = RTE_DIR24_8_8B;
> > + rib->lookup = rte_dir24_8_lookup_bulk_8b;
> > + break;
> > + case RTE_RIB_TYPE_MAX:
> > + default:
> > + RTE_LOG(ERR, LPM, "Bad RIB %s type\n", name);
> > + rte_errno = EINVAL;
> > + goto free_rib;
> > + }
> > + rib->fib = (void *)rte_dir24_8_create(name, socket_id,
> > + dir24_8_nh_size, conf->def_nh);
> > + if (rib->fib == NULL) {
> > + RTE_LOG(ERR, LPM, "Failed to allocate FIB %s\n",
> name);
> > + rte_errno = ENOMEM;
> > + goto free_rib;
> > + }
> > + rib->modify = rte_dir24_8_modify;
> > + }
> > +
> > + switch (conf->alloc_type) {
> > + case RTE_RIB_MALLOC:
> > + rib->alloc_node = new_node_malloc;
> > + rib->free_node = free_node_malloc;
> > + break;
> > + case RTE_RIB_MEMPOOL:
> > + rib->node_pool = node_pool;
> > + rib->alloc_node = new_node_mempool;
> > + rib->free_node = free_node_mempool;
> > + break;
> > + case RTE_RIB_ALLOC_MAX:
> > + default:
> > + RTE_LOG(ERR, LPM, "Bad RIB %s alloc type\n", name);
> > + rte_errno = EINVAL;
> > + goto free_fib;
> > + }
> > +
> > + te->data = (void *)rib;
> > + TAILQ_INSERT_TAIL(rib_list, te, next);
> > +
> > + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
> > +
> > + return rib;
> > +
> > +free_fib:
> > + switch (conf->type) {
> > + case RTE_RIB_DIR24_8_1B:
> > + case RTE_RIB_DIR24_8_2B:
> > + case RTE_RIB_DIR24_8_4B:
> > + case RTE_RIB_DIR24_8_8B:
> > + rte_dir24_8_free(rib->fib);
> > + break;
> > + default:
> > + break;
> > + }
> > +free_rib:
> > + rte_free(rib);
> > +free_te:
> > + rte_free(te);
> > +exit:
> > + if (conf->alloc_type == RTE_RIB_MEMPOOL)
> > + rte_mempool_free(node_pool);
> > + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
> > +
> > + return NULL;
> > +}
> > +
> > +struct rte_rib *
> > +rte_rib_find_existing(const char *name)
> > +{
> > + struct rte_rib *rib = NULL;
> > + struct rte_tailq_entry *te;
> > + struct rte_rib_list *rib_list;
> > +
> > + rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
> > +
> > + rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
> > + TAILQ_FOREACH(te, rib_list, next) {
> > + rib = (struct rte_rib *) te->data;
> > + if (strncmp(name, rib->name, RTE_RIB_NAMESIZE) == 0)
> > + break;
> > + }
> > + rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
> > +
> > + if (te == NULL) {
> > + rte_errno = ENOENT;
> > + return NULL;
> > + }
> > +
> > + return rib;
> > +}
> > +
> > +void
> > +rte_rib_free(struct rte_rib *rib)
> > +{
> > + struct rte_tailq_entry *te;
> > + struct rte_rib_list *rib_list;
> > + struct rte_rib_node *tmp = NULL;
> > +
> > + if (rib == NULL)
> > + return;
> > +
> > + rib_list = RTE_TAILQ_CAST(rte_rib_tailq.head, rte_rib_list);
> > +
> > + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
> > +
> > + /* find our tailq entry */
> > + TAILQ_FOREACH(te, rib_list, next) {
> > + if (te->data == (void *)rib)
> > + break;
> > + }
> > + if (te != NULL)
> > + TAILQ_REMOVE(rib_list, te, next);
> > +
> > + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
> > +
> > + while ((tmp = rte_rib_tree_get_nxt(rib, 0, 0, tmp,
> > + RTE_RIB_GET_NXT_ALL)) != NULL)
> > + rte_rib_tree_remove(rib, tmp->key, tmp->depth);
> > +
> > + if (rib->alloc_type == RTE_RIB_MEMPOOL)
> > + rte_mempool_free(rib->node_pool);
> > +
> > + switch (rib->type) {
> > + case RTE_RIB_DIR24_8_1B:
> > + case RTE_RIB_DIR24_8_2B:
> > + case RTE_RIB_DIR24_8_4B:
> > + case RTE_RIB_DIR24_8_8B:
> > + rte_dir24_8_free(rib->fib);
> > + break;
> > + default:
> > + break;
> > + }
> > +
> > + rte_free(rib);
> > + rte_free(te);
> > +}
> > +
> > +int
> > +rte_rib_add(struct rte_rib *rib, uint32_t ip, uint8_t depth, uint64_t
> next_hop)
> > +{
> > + if ((rib == NULL) || (depth > RTE_RIB_MAXDEPTH))
> > + return -EINVAL;
> > +
> > + return rib->modify(rib, ip, depth, next_hop, RTE_RIB_ADD);
> > +}
> > +
> > +int
> > +rte_rib_delete(struct rte_rib *rib, uint32_t ip, uint8_t depth)
> > +{
> > + if ((rib == NULL) || (depth > RTE_RIB_MAXDEPTH))
> > + return -EINVAL;
> > +
> > + return rib->modify(rib, ip, depth, 0, RTE_RIB_DEL);
> > +}
> <snip>
>
--
Regards,
Vladimir
More information about the dev
mailing list