D27401.id80103.diff
No OneTemporary
Actions

Size

241 KB

Referenced Files

None

Subscribers

None

D27401.id80103.diff
View Options

	Index: sys/conf/files
	===================================================================
	--- sys/conf/files
	+++ sys/conf/files
	@@ -4174,6 +4174,7 @@
	net/route/nhop.c standard
	net/route/nhop_ctl.c standard
	net/route/nhop_utils.c standard
	+net/route/route_algo.c optional route_algo
	net/route/route_ctl.c standard
	net/route/route_ddb.c optional ddb
	net/route/route_helpers.c standard
	@@ -4324,6 +4325,7 @@
	netinet/in_kdtrace.c optional inet \| inet6
	netinet/ip_carp.c optional inet carp \| inet6 carp
	netinet/in_fib.c optional inet
	+netinet/in_fib_algo.c optional inet route_algo
	netinet/in_gif.c optional gif inet \| netgraph_gif inet
	netinet/ip_gre.c optional gre inet
	netinet/ip_id.c optional inet
	@@ -4400,6 +4402,7 @@
	netinet6/in6.c optional inet6
	netinet6/in6_cksum.c optional inet6
	netinet6/in6_fib.c optional inet6
	+netinet6/in6_fib_algo.c optional inet6 route_algo
	netinet6/in6_gif.c optional gif inet6 \| netgraph_gif inet6
	netinet6/in6_ifattach.c optional inet6
	netinet6/in6_jail.c optional inet6
	Index: sys/conf/options
	===================================================================
	--- sys/conf/options
	+++ sys/conf/options
	@@ -454,6 +454,7 @@
	PF_DEFAULT_TO_DROP opt_pf.h
	RADIX_MPATH opt_mpath.h
	ROUTE_MPATH opt_route.h
	+ROUTE_ALGO opt_route.h
	ROUTETABLES opt_route.h
	RSS opt_rss.h
	SLIP_IFF_OPTS opt_slip.h
	Index: sys/contrib/dpdk_rte_lpm/dpdk_lpm.c
	===================================================================
	--- /dev/null
	+++ sys/contrib/dpdk_rte_lpm/dpdk_lpm.c
	@@ -0,0 +1,480 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2020 Alexander V. Chernikov
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+__FBSDID("$FreeBSD$");
	+
	+#include <sys/param.h>
	+#include <sys/kernel.h>
	+#include <sys/lock.h>
	+#include <sys/rmlock.h>
	+#include <sys/malloc.h>
	+#include <sys/module.h>
	+#include <sys/kernel.h>
	+#include <sys/socket.h>
	+#include <sys/sysctl.h>
	+#include <net/vnet.h>
	+
	+#include <net/if.h>
	+#include <net/if_var.h>
	+
	+#include <netinet/in.h>
	+#include <netinet/in_fib.h>
	+#include <netinet/ip.h>
	+
	+#include <net/route.h>
	+#include <net/route/nhop.h>
	+#include <net/route/route_ctl.h>
	+#include <net/route/route_var.h>
	+#include <net/route/route_algo.h>
	+#define RTDEBUG
	+
	+#include "rte_shim.h"
	+#include "rte_lpm.h"
	+
	+#define LPM_MIN_TBL8 8 /* 2 pages of memory */
	+#define LPM_MAX_TBL8 65536 * 16 /* 256M */
	+
	+struct fib_algo_calldata {
	+ void *lookup;
	+ void *arg;
	+};
	+
	+struct dpdk_lpm_data {
	+ struct rte_lpm *lpm;
	+ uint32_t number_tbl8s;
	+ uint64_t routes_added;
	+ uint64_t routes_failed;
	+ uint32_t fibnum;
	+ uint8_t hit_tables;
	+ uint8_t hit_records;
	+ struct fib_algo_calldata fa;
	+ struct fib_data *fd;
	+};
	+
	+/*
	+ * Main datapath routing
	+ */
	+static struct nhop_object *
	+lookup_ptr(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
	+{
	+ struct rte_lpm *lpm;
	+ const struct rte_lpm_external *rte_ext;
	+ uint32_t nhidx = 0;
	+ int ret;
	+
	+ lpm = (struct rte_lpm *)algo_data;
	+ rte_ext = (const struct rte_lpm_external *)lpm;
	+
	+ ret = rte_lpm_lookup(lpm, key.addr4.s_addr, &nhidx);
	+ if (ret == 0) {
	+ /* Success! */
	+ return (rte_ext->nh_idx[nhidx]);
	+ } else {
	+ /* Not found. Check default route */
	+ return (rte_ext->nh_idx[rte_ext->default_idx]);
	+ }
	+
	+ return (NULL);
	+}
	+
	+static uint8_t
	+rte_get_pref(const struct rib_rtable_info *rinfo)
	+{
	+
	+ if (rinfo->num_prefixes < 10)
	+ return (1);
	+ else if (rinfo->num_prefixes < 1000)
	+ return (rinfo->num_prefixes / 10);
	+ else if (rinfo->num_prefixes < 500000)
	+ return (100 + rinfo->num_prefixes / 3334);
	+ else
	+ return (250);
	+}
	+
	+static int
	+contigmask(const uint8_t *p, int len)
	+{
	+ int i, n;
	+
	+ for (i = 0; i < len ; i++)
	+ if ( (p[i/8] & (1 << (7 - (i%8)))) == 0) /* first bit unset */
	+ break;
	+ for (n= i + 1; n < len; n++)
	+ if ( (p[n/8] & (1 << (7 - (n % 8)))) != 0)
	+ return (-1); /* mask not contiguous */
	+ return (i);
	+}
	+
	+static uint8_t
	+rt_get_plen(const struct rtentry *rt)
	+{
	+ const struct sockaddr *sa;
	+ int family;
	+ int plen;
	+
	+ sa = rt_key_const(rt);
	+ family = sa->sa_family;
	+ sa = rt_mask_const(rt);
	+ switch (family) {
	+ case AF_INET:
	+ if (sa != NULL) {
	+ const struct in_addr *addr4;
	+ addr4 = &((const struct sockaddr_in *)sa)->sin_addr;
	+ plen = contigmask((const uint8_t *)addr4, 32);
	+ if (plen == -1)
	+ plen = 0;
	+ } else
	+ plen = 32;
	+ break;
	+ case AF_INET6:
	+ if (sa != NULL) {
	+ const struct in6_addr *addr6;
	+ addr6 = &((const struct sockaddr_in6 *)sa)->sin6_addr;
	+ plen = contigmask((const uint8_t *)addr6, 128);
	+ if (plen == -1)
	+ plen = 0;
	+ } else
	+ plen = 128;
	+ break;
	+ default:
	+ plen = 0;
	+ }
	+
	+ return (plen);
	+}
	+
	+static enum flm_op_result
	+handle_default_change(struct dpdk_lpm_data dd, struct rib_cmd_info rc)
	+{
	+ struct rte_lpm_external *rte_ext;
	+ rte_ext = (struct rte_lpm_external *)dd->lpm;
	+ uint32_t old_nhidx = rte_ext->default_idx;
	+
	+ if (rc->rc_cmd != RTM_DELETE) {
	+ /* Reference new */
	+ uint32_t nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new);
	+
	+ if (nhidx == 0)
	+ return (FLM_REBUILD);
	+ rte_ext->default_idx = nhidx;
	+ } else {
	+ /* No default route */
	+ rte_ext->default_idx = 0;
	+ }
	+
	+ if (old_nhidx != 0)
	+ fib_free_nhop_idx(dd->fd, old_nhidx);
	+
	+ return (FLM_SUCCESS);
	+}
	+
	+static void
	+get_parent_rule(struct dpdk_lpm_data dd, struct in_addr addr, uint8_t plen, uint32_t *nhop_idx)
	+{
	+ struct route_nhop_data rnd;
	+ struct rtentry *rt;
	+
	+ rt = fib4_lookup_rt(dd->fibnum, addr, 0, NHR_UNLOCKED, &rnd);
	+ if ((rt != NULL) && (*plen = rt_get_plen(rt)) > 0) {
	+ *nhop_idx = fib_get_nhop_idx(dd->fd, rnd.rnd_nhop);
	+ } else {
	+ *nhop_idx = 0;
	+ *plen = 0;
	+ }
	+}
	+
	+static enum flm_op_result
	+handle_gu_change(struct dpdk_lpm_data dd, const struct rib_cmd_info rc,
	+ const struct in_addr addr, int plen)
	+{
	+ uint32_t nhidx = 0;
	+ int ret;
	+ char abuf[INET_ADDRSTRLEN];
	+ inet_ntop(AF_INET, &addr, abuf, sizeof(abuf));
	+
	+ /* So we get sin, plen and nhidx */
	+ if (rc->rc_cmd != RTM_DELETE) {
	+ /*
	+ * Addition or change. Save nhop in the internal table
	+ * and get index.
	+ */
	+ nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new);
	+ if (nhidx == 0) {
	+ DPRINTF("nhop limit reached, need rebuild");
	+ return (FLM_REBUILD);
	+ }
	+
	+ ret = rte_lpm_add(dd->lpm, addr.s_addr, plen, nhidx);
	+ DPRINTF("DPDK GU: %s %s/%d nhop %u = %d", (rc->rc_cmd == RTM_ADD) ? "ADD" : "UPDATE",
	+ abuf, plen, nhidx, ret);
	+ } else {
	+ /*
	+ * Need to lookup parent. Assume deletion happened already
	+ */
	+ const struct sockaddr_in *dst;
	+ dst = (const struct sockaddr_in *)rt_key_const(rc->rc_rt);
	+
	+ uint8_t parent_plen;
	+ uint32_t parent_nhop_idx;
	+ get_parent_rule(dd, dst->sin_addr, &parent_plen, &parent_nhop_idx);
	+
	+ ret = rte_lpm_delete(dd->lpm, addr.s_addr, plen, parent_plen, parent_nhop_idx);
	+ DPRINTF("DPDK: %s %s/%d nhop %u = %d", "DEL", abuf, plen, nhidx, ret);
	+ }
	+
	+ if (rc->rc_nh_old != NULL)
	+ fib_free_nhop(dd->fd, rc->rc_nh_old);
	+
	+ if (ret != 0) {
	+ DPRINTF("error: %d", ret);
	+ if (ret == -EOVERFLOW)
	+ return (FLM_REBUILD);
	+ return (FLM_ERROR);
	+ }
	+ return (FLM_SUCCESS);
	+}
	+
	+static enum flm_op_result
	+handle_rtable_change_cb(struct rib_head rnh, struct rib_cmd_info rc,
	+ void *_data)
	+{
	+ const struct sockaddr_in *sin;
	+ int plen = rt_get_plen(rc->rc_rt);
	+ enum flm_op_result ret;
	+ struct dpdk_lpm_data *dd;
	+
	+ dd = (struct dpdk_lpm_data *)_data;
	+ sin = (const struct sockaddr_in *)rt_key_const(rc->rc_rt);
	+
	+ if (plen != 0)
	+ ret = handle_gu_change(dd, rc, sin->sin_addr, plen);
	+ else
	+ ret = handle_default_change(dd, rc);
	+
	+ if (ret != 0)
	+ DPRINTF("error handling route");
	+ return (ret);
	+}
	+
	+static void
	+destroy_table(void *_data)
	+{
	+ struct dpdk_lpm_data dd = (struct dpdk_lpm_data )_data;
	+
	+ DPRINTF("destroy dd %p", dd);
	+ if (dd->lpm != NULL)
	+ rte_lpm_free(dd->lpm);
	+ free(dd, M_TEMP);
	+}
	+
	+static enum flm_op_result
	+add_route_cb(struct rtentry rt, void _data)
	+{
	+ struct dpdk_lpm_data dd = (struct dpdk_lpm_data )_data;
	+ const struct sockaddr_in *sin;
	+ int plen = rt_get_plen(rt);
	+ int ret;
	+
	+ sin = (const struct sockaddr_in *)rt_key_const(rt);
	+
	+ char abuf[INET_ADDRSTRLEN];
	+ char mbuf[INET_ADDRSTRLEN];
	+ inet_ntop(AF_INET, &sin->sin_addr, abuf, sizeof(abuf));
	+
	+ const struct sockaddr_in *mask;
	+ mask = (const struct sockaddr_in *)rt_mask_const(rt);
	+ if (mask != NULL) {
	+ inet_ntop(AF_INET, &mask->sin_addr, mbuf, sizeof(mbuf));
	+ } else
	+ mbuf[0] = '\0';
	+
	+ DPRINTF("Operating on %s/%d [%s]", abuf, plen, mbuf);
	+
	+ if (plen == 0) {
	+ struct rib_cmd_info rc;
	+
	+ bzero(&rc, sizeof(rc));
	+ rc.rc_cmd = RTM_ADD;
	+ rc.rc_nh_new = rt->rt_nhop;
	+ DPRINTF("Adding default route");
	+ return (handle_default_change(dd, &rc));
	+ }
	+
	+ uint32_t nhidx = fib_get_nhop_idx(dd->fd, rt->rt_nhop);
	+ if (nhidx == 0) {
	+ DPRINTF("unable to get nhop index");
	+ return (FLM_REBUILD);
	+ }
	+ ret = rte_lpm_add(dd->lpm, sin->sin_addr.s_addr, plen, nhidx);
	+ DPRINTF("ADD %p %s/%d nh %u = %d", dd->lpm, abuf, plen, nhidx, ret);
	+
	+ if (ret != 0) {
	+ DPRINTF("rte_lpm_add() returned %d", ret);
	+ if (ret == -ENOSPC) {
	+ dd->hit_tables = 1;
	+ return (FLM_REBUILD);
	+ }
	+ dd->routes_failed++;
	+ } else
	+ dd->routes_added++;
	+
	+ return (FLM_SUCCESS);
	+}
	+
	+static enum flm_op_result
	+check_dump_success(void _data, struct fib_dp dp)
	+{
	+ struct dpdk_lpm_data *dd;
	+
	+ dd = (struct dpdk_lpm_data *)_data;
	+
	+ DPRINTF("scan completed. added: %zu failed: %zu",
	+ dd->routes_added, dd->routes_failed);
	+ if (dd->hit_tables \|\| dd->routes_failed > 0)
	+ return (FLM_REBUILD);
	+
	+ DPRINTF("DPDK lookup engine synced with IPv4 RIB id %u, %zu routes",
	+ dd->fibnum, dd->routes_added);
	+
	+ dp->f = lookup_ptr;
	+ dp->arg = dd;
	+
	+ return (FLM_SUCCESS);
	+}
	+
	+static void
	+estimate_scale(const struct dpdk_lpm_data dd_src, struct dpdk_lpm_data dd)
	+{
	+
	+ /* XXX: update at 75% capacity */
	+ if (dd_src->hit_tables)
	+ dd->number_tbl8s = dd_src->number_tbl8s * 2;
	+ else
	+ dd->number_tbl8s = dd_src->number_tbl8s;
	+
	+ /* TODO: look into the appropriate RIB to adjust */
	+}
	+
	+static struct dpdk_lpm_data *
	+build_table(struct dpdk_lpm_data *dd_prev)
	+{
	+ struct dpdk_lpm_data *dd;
	+ struct rte_lpm *lpm;
	+
	+ dd = malloc(sizeof(struct dpdk_lpm_data), M_TEMP, M_NOWAIT \| M_ZERO);
	+ if (dd == NULL) {
	+ DPRINTF("Unable to allocate base datastructure");
	+ return (NULL);
	+ }
	+ dd->fibnum = dd_prev->fibnum;
	+ dd->fd = dd_prev->fd;
	+
	+ estimate_scale(dd_prev, dd);
	+
	+ struct rte_lpm_config cfg = {.number_tbl8s = dd->number_tbl8s};
	+ lpm = rte_lpm_create("test", 0, &cfg);
	+ if (lpm == NULL) {
	+ DPRINTF("unable to create lpm");
	+ free(dd, M_TEMP);
	+ return (NULL);
	+ }
	+ dd->lpm = lpm;
	+ struct rte_lpm_external ext = (struct rte_lpm_external )lpm;
	+ ext->nh_idx = fib_get_nhop_array(dd->fd);
	+
	+ DPRINTF("allocated %u tbl8s", dd->number_tbl8s);
	+
	+ return (dd);
	+}
	+
	+static enum flm_op_result
	+init_table(uint32_t fibnum, struct fib_data fd, void _old_data, void **data)
	+{
	+ struct dpdk_lpm_data *dd, dd_base;
	+
	+ if (_old_data == NULL) {
	+ bzero(&dd_base, sizeof(struct dpdk_lpm_data));
	+ dd_base.fibnum = fibnum;
	+ dd_base.fd = fd;
	+ /* TODO: get rib statistics */
	+ dd_base.number_tbl8s = LPM_MIN_TBL8;
	+ dd = &dd_base;
	+ } else {
	+ DPRINTF("Starting with old data");
	+ dd = (struct dpdk_lpm_data *)_old_data;
	+ }
	+
	+ /* Guaranteed to be in epoch */
	+ dd = build_table(dd);
	+ if (dd == NULL) {
	+ DPRINTF("table creation failed");
	+ return (FLM_REBUILD);
	+ }
	+
	+ *data = dd;
	+ return (FLM_SUCCESS);
	+}
	+
	+static struct fib_lookup_module dpdk_lpm4 = {
	+ .flm_name = "dpdk_lpm4",
	+ .flm_family = AF_INET,
	+ .flm_init_cb = init_table,
	+ .flm_destroy_cb = destroy_table,
	+ .flm_dump_rib_item_cb = add_route_cb,
	+ .flm_dump_end_cb = check_dump_success,
	+ .flm_change_rib_item_cb = handle_rtable_change_cb,
	+ .flm_get_pref = rte_get_pref,
	+};
	+
	+static int
	+lpm4_modevent(module_t mod, int type, void *unused)
	+{
	+ int error = 0;
	+
	+ switch (type) {
	+ case MOD_LOAD:
	+ fib_module_register(&dpdk_lpm4);
	+ break;
	+ case MOD_UNLOAD:
	+ error = fib_module_unregister(&dpdk_lpm4);
	+ break;
	+ default:
	+ error = EOPNOTSUPP;
	+ break;
	+ }
	+ return (error);
	+}
	+
	+static moduledata_t lpm4mod = {
	+ "dpdk_lpm4",
	+ lpm4_modevent,
	+ 0
	+};
	+
	+DECLARE_MODULE(lpm4mod, lpm4mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
	+MODULE_VERSION(lpm4mod, 1);
	Index: sys/contrib/dpdk_rte_lpm/dpdk_lpm6.h
	===================================================================
	--- sys/contrib/dpdk_rte_lpm/dpdk_lpm6.h
	+++ sys/contrib/dpdk_rte_lpm/dpdk_lpm6.h
	@@ -1,6 +1,7 @@
	/*-
	- * Copyright (c) 2015
	- * Alexander V. Chernikov <melifaro@FreeBSD.org>
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2020 Alexander V. Chernikov
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	@@ -10,14 +11,11 @@
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	- * 3. Neither the name of the University nor the names of its contributors
	- * may be used to endorse or promote products derived from this software
	- * without specific prior written permission.
	*
	- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	@@ -29,17 +27,31 @@
	* $FreeBSD$
	*/

	-#ifndef _NETINET6_IN6_FIB_H_
	-#define _NETINET6_IN6_FIB_H_
	+/*
	+ * Contains various definitions shared between the parts of a routing subsystem.
	+ *
	+ */
	+
	+#ifndef _NETINET6_DPDK_LPM6_H_
	+#define _NETINET6_DPDK_LPM6_H_
	+
	+/** LPM structure. */
	+struct rte_lpm6;
	+
	+/** LPM configuration structure. */
	+struct rte_lpm6_config {
	+ uint32_t max_rules; /*< Max number of rules. /
	+ uint32_t number_tbl8s; /*< Number of tbl8s to allocate. /
	+ int flags; /*< This field is currently unused. /
	+};
	+
	+struct rte_lpm6 *
	+rte_lpm6_create(const char *name, int socket_id,
	+ const struct rte_lpm6_config *config);
	+void
	+rte_lpm6_free(struct rte_lpm6 *lpm);
	+int
	+rte_lpm6_add(struct rte_lpm6 lpm, const uint8_t ip, uint8_t depth,
	+ uint32_t next_hop, int is_new_rule);

	-struct nhop_object *fib6_lookup(uint32_t fibnum,
	- const struct in6_addr *dst6, uint32_t scopeid, uint32_t flags,
	- uint32_t flowid);
	-int fib6_check_urpf(uint32_t fibnum, const struct in6_addr *dst6,
	- uint32_t scopeid, uint32_t flags, const struct ifnet *src_if);
	-struct nhop_object *fib6_lookup_debugnet(uint32_t fibnum,
	- const struct in6_addr *dst6, uint32_t scopeid, uint32_t flags);
	-uint32_t fib6_calc_software_hash(const struct in6_addr *src,
	- const struct in6_addr *dst, unsigned short src_port, unsigned short dst_port,
	- char proto, uint32_t *phashtype);
	#endif
	Index: sys/contrib/dpdk_rte_lpm/dpdk_lpm6.c
	===================================================================
	--- /dev/null
	+++ sys/contrib/dpdk_rte_lpm/dpdk_lpm6.c
	@@ -0,0 +1,560 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2020 Alexander V. Chernikov
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+__FBSDID("$FreeBSD$");
	+
	+#include <sys/param.h>
	+#include <sys/kernel.h>
	+#include <sys/lock.h>
	+#include <sys/rmlock.h>
	+#include <sys/malloc.h>
	+#include <sys/module.h>
	+#include <sys/kernel.h>
	+#include <sys/socket.h>
	+#include <sys/sysctl.h>
	+#include <net/vnet.h>
	+
	+#include <net/if.h>
	+#include <net/if_var.h>
	+
	+#include <netinet/in.h>
	+#include <netinet/ip.h>
	+#include <netinet/ip6.h>
	+#include <netinet6/ip6_var.h>
	+#include <netinet6/in6_fib.h>
	+
	+#include <net/route.h>
	+#include <net/route/nhop.h>
	+#include <net/route/route_ctl.h>
	+#include <net/route/route_var.h>
	+#include <net/route/route_algo.h>
	+#define RTDEBUG
	+
	+#include "rte_lpm6.h"
	+
	+#define LPM6_MIN_TBL8 8 /* 2 pages of memory */
	+#define LPM6_MAX_TBL8 65536 * 16 /* 256M */
	+
	+struct fib_algo_calldata {
	+ void *lookup;
	+ void *arg;
	+};
	+
	+struct dpdk_lpm6_data {
	+ struct rte_lpm6 *lpm6;
	+ uint32_t number_tbl8s;
	+ uint64_t routes_added;
	+ uint64_t routes_failed;
	+ uint32_t fibnum;
	+ uint8_t hit_tables;
	+ struct fib_data *fd;
	+};
	+
	+static struct nhop_object *
	+lookup_ptr_ll(const struct rte_lpm6 lpm6, const struct in6_addr dst6,
	+ uint32_t scopeid)
	+{
	+ const struct rte_lpm6_external *rte_ext;
	+ struct nhop_object *nh = NULL;
	+ struct sockaddr_in6 sin6;
	+ struct rib_head *rh;
	+ struct radix_node *rn;
	+ RIB_RLOCK_TRACKER;
	+
	+ memset(&sin6, 0, sizeof(sin6));
	+ sin6.sin6_len = sizeof(struct sockaddr_in6);
	+ sin6.sin6_addr = *dst6;
	+ /* Assume scopeid is valid and embed it directly */
	+ sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff);
	+
	+ rte_ext = (const struct rte_lpm6_external *)lpm6;
	+ rh = rt_tables_get_rnh(rte_ext->fibnum, AF_INET6);
	+ if (rh == NULL)
	+ return (NULL);
	+
	+ RIB_RLOCK(rh);
	+ rn = rh->rnh_matchaddr((void *)&sin6, &rh->head);
	+ if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0))
	+ nh = RNTORT(rn)->rt_nhop;
	+ RIB_RUNLOCK(rh);
	+ //TODO: check LL nhops refcounting
	+
	+ return (nh);
	+}
	+
	+/*
	+ * Main datapath routing
	+ */
	+static struct nhop_object *
	+lookup_ptr(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
	+{
	+ const struct rte_lpm6 *lpm6;
	+ const struct rte_lpm6_external *rte_ext;
	+ const struct in6_addr *addr6;
	+ uint32_t nhidx = 0;
	+ int ret;
	+
	+ lpm6 = (const struct rte_lpm6 *)algo_data;
	+ addr6 = key.addr6;
	+ rte_ext = (const struct rte_lpm6_external *)lpm6;
	+
	+ if (!IN6_IS_SCOPE_LINKLOCAL(addr6)) {
	+ ret = rte_lpm6_lookup(lpm6, (const uint8_t *)addr6, &nhidx);
	+ if (ret == 0) {
	+ /* Success! */
	+ return (rte_ext->nh_idx[nhidx]);
	+ } else {
	+ /* Not found. Check default route */
	+ if (rte_ext->default_idx > 0)
	+ return (rte_ext->nh_idx[rte_ext->default_idx]);
	+ else
	+ return (NULL);
	+ }
	+ } else {
	+ /* LL */
	+ return (lookup_ptr_ll(lpm6, addr6, scopeid));
	+ }
	+}
	+
	+static uint8_t
	+rte6_get_pref(const struct rib_rtable_info *rinfo)
	+{
	+
	+ if (rinfo->num_prefixes < 10)
	+ return (1);
	+ else if (rinfo->num_prefixes < 1000)
	+ return (rinfo->num_prefixes / 10);
	+ else if (rinfo->num_prefixes < 500000)
	+ return (100 + rinfo->num_prefixes / 3334);
	+ else
	+ return (250);
	+}
	+
	+static int
	+contigmask(const uint8_t *p, int len)
	+{
	+ int i, n;
	+
	+ for (i = 0; i < len ; i++)
	+ if ( (p[i/8] & (1 << (7 - (i%8)))) == 0) /* first bit unset */
	+ break;
	+ for (n= i + 1; n < len; n++)
	+ if ( (p[n/8] & (1 << (7 - (n % 8)))) != 0)
	+ return (-1); /* mask not contiguous */
	+ return (i);
	+}
	+
	+static uint8_t
	+rt_get_plen(const struct rtentry *rt)
	+{
	+ const struct sockaddr *sa;
	+ int family;
	+ int plen;
	+
	+ sa = rt_key_const(rt);
	+ family = sa->sa_family;
	+ sa = rt_mask_const(rt);
	+ switch (family) {
	+ case AF_INET:
	+ if (sa != NULL) {
	+ const struct in_addr *addr4;
	+ addr4 = &((const struct sockaddr_in *)sa)->sin_addr;
	+ plen = contigmask((const uint8_t *)addr4, 32);
	+ if (plen == -1)
	+ plen = 0;
	+ } else
	+ plen = 32;
	+ break;
	+ case AF_INET6:
	+ if (sa != NULL) {
	+ const struct in6_addr *addr6;
	+ addr6 = &((const struct sockaddr_in6 *)sa)->sin6_addr;
	+ plen = contigmask((const uint8_t *)addr6, 128);
	+ if (plen == -1)
	+ plen = 0;
	+ } else
	+ plen = 128;
	+ break;
	+ default:
	+ plen = 0;
	+ }
	+
	+ return (plen);
	+}
	+
	+static enum flm_op_result
	+handle_default_change(struct dpdk_lpm6_data dd, struct rib_cmd_info rc)
	+{
	+ struct rte_lpm6_external *rte_ext;
	+ rte_ext = (struct rte_lpm6_external *)dd->lpm6;
	+ uint32_t old_nhidx = rte_ext->default_idx;
	+
	+ if (rc->rc_cmd != RTM_DELETE) {
	+ /* Reference new */
	+ uint32_t nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new);
	+
	+ if (nhidx == 0)
	+ return (FLM_REBUILD);
	+ rte_ext->default_idx = nhidx;
	+ } else {
	+ /* No default route */
	+ rte_ext->default_idx = 0;
	+ }
	+
	+ if (old_nhidx != 0)
	+ fib_free_nhop_idx(dd->fd, old_nhidx);
	+
	+ return (FLM_SUCCESS);
	+}
	+
	+static enum flm_op_result
	+handle_ll_change(struct dpdk_lpm6_data dd, struct rib_cmd_info rc,
	+ const struct sockaddr_in6 *sin6, int plen)
	+{
	+
	+ return (FLM_SUCCESS);
	+}
	+
	+static struct rte_lpm6_rule *
	+pack_parent_rule(struct dpdk_lpm6_data dd, const struct in6_addr addr6,
	+ char *buffer)
	+{
	+ struct rte_lpm6_rule *lsp_rule = NULL;
	+ struct route_nhop_data rnd;
	+ struct rtentry *rt;
	+ int plen;
	+
	+ rt = fib6_lookup_rt(dd->fibnum, addr6, 0, NHR_UNLOCKED, &rnd);
	+ /* plen = 0 means default route and it's out of scope */
	+ if ((rt != NULL) && (plen = rt_get_plen(rt)) > 0) {
	+ uint32_t nhidx = fib_get_nhop_idx(dd->fd, rnd.rnd_nhop);
	+ if (nhidx == 0) {
	+ /*
	+ * shouldn't happen as we already have parent route.
	+ * It will trigger rebuild automatically.
	+ */
	+ return (NULL);
	+ }
	+ const struct sockaddr_in6 *psin6;
	+ const uint8_t *uaddr6;
	+ psin6 = (const struct sockaddr_in6 *)rt_key_const(rt);
	+ uaddr6 = (const uint8_t *)&psin6->sin6_addr;
	+ lsp_rule = fill_rule6(buffer, uaddr6, plen, nhidx);
	+ }
	+
	+ return (lsp_rule);
	+}
	+
	+static enum flm_op_result
	+handle_gu_change(struct dpdk_lpm6_data dd, const struct rib_cmd_info rc,
	+ const struct in6_addr *addr6, int plen)
	+{
	+ uint32_t nhidx = 0;
	+ int ret;
	+ char abuf[INET6_ADDRSTRLEN];
	+ inet_ntop(AF_INET6, addr6, abuf, sizeof(abuf));
	+
	+ /* So we get sin6, plen and nhidx */
	+ if (rc->rc_cmd != RTM_DELETE) {
	+ /*
	+ * Addition or change. Save nhop in the internal table
	+ * and get index.
	+ */
	+ nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new);
	+ if (nhidx == 0) {
	+ DPRINTF("FCK: nhop limit reached, need rebuild");
	+ return (FLM_REBUILD);
	+ }
	+
	+ ret = rte_lpm6_add(dd->lpm6, (const uint8_t *)addr6,
	+ plen, nhidx, (rc->rc_cmd == RTM_ADD) ? 1 : 0);
	+ DPRINTF("DPDK GU: %s %s/%d nhop %u = %d", (rc->rc_cmd == RTM_ADD) ? "ADD" : "UPDATE",
	+ abuf, plen, nhidx, ret);
	+ } else {
	+ /*
	+ * Need to lookup parent. Assume deletion happened already
	+ */
	+ char buffer[RTE_LPM6_RULE_SIZE];
	+ struct rte_lpm6_rule *lsp_rule = NULL;
	+ const struct sockaddr_in6 *dst6;
	+ dst6 = (const struct sockaddr_in6 *)rt_key_const(rc->rc_rt);
	+
	+ lsp_rule = pack_parent_rule(dd, &dst6->sin6_addr, buffer);
	+
	+ ret = rte_lpm6_delete(dd->lpm6, (const uint8_t *)addr6, plen, lsp_rule);
	+ DPRINTF("DPDK GU: %s %s/%d nhop %u = %d", "DEL", abuf, plen, nhidx, ret);
	+ }
	+
	+ if (rc->rc_nh_old != NULL)
	+ fib_free_nhop(dd->fd, rc->rc_nh_old);
	+
	+ if (ret != 0) {
	+ DPRINTF("error: %d", ret);
	+ if (ret == -EOVERFLOW)
	+ return (FLM_REBUILD);
	+ return (FLM_ERROR);
	+ }
	+ return (FLM_SUCCESS);
	+}
	+
	+static enum flm_op_result
	+handle_any_change(struct dpdk_lpm6_data dd, struct rib_cmd_info rc)
	+{
	+ const struct sockaddr_in6 *sin6;
	+ int plen = rt_get_plen(rc->rc_rt);
	+ enum flm_op_result ret;
	+
	+ sin6 = (const struct sockaddr_in6 *)rt_key_const(rc->rc_rt);
	+
	+ if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
	+ ret = handle_ll_change(dd, rc, sin6, plen);
	+ else if (plen == 0)
	+ ret = handle_default_change(dd, rc);
	+ else
	+ ret = handle_gu_change(dd, rc, &sin6->sin6_addr, plen);
	+
	+ if (ret != 0)
	+ DPRINTF("error handling route");
	+ return (ret);
	+}
	+
	+static enum flm_op_result
	+handle_rtable_change_cb(struct rib_head rnh, struct rib_cmd_info rc,
	+ void *_data)
	+{
	+ struct dpdk_lpm6_data *dd;
	+
	+ dd = (struct dpdk_lpm6_data *)_data;
	+
	+ return (handle_any_change(dd, rc));
	+}
	+
	+static void
	+destroy_dd(struct dpdk_lpm6_data *dd)
	+{
	+
	+ DPRINTF("destroy dd %p", dd);
	+ if (dd->lpm6 != NULL)
	+ rte_lpm6_free(dd->lpm6);
	+ free(dd, M_TEMP);
	+}
	+
	+static void
	+destroy_table(void *_data)
	+{
	+
	+ destroy_dd((struct dpdk_lpm6_data *)_data);
	+}
	+
	+static enum flm_op_result
	+add_route_cb(struct rtentry rt, void _data)
	+{
	+ struct dpdk_lpm6_data dd = (struct dpdk_lpm6_data )_data;
	+ const struct sockaddr_in6 *sin6;
	+ int plen = rt_get_plen(rt);
	+ int ret;
	+
	+ sin6 = (const struct sockaddr_in6 *)rt_key_const(rt);
	+
	+ char abuf[INET6_ADDRSTRLEN];
	+ char mbuf[INET6_ADDRSTRLEN];
	+ inet_ntop(AF_INET6, &sin6->sin6_addr, abuf, sizeof(abuf));
	+
	+ const struct sockaddr_in6 *mask6;
	+ mask6 = (const struct sockaddr_in6 *)rt_mask_const(rt);
	+ if (mask6 != NULL) {
	+ inet_ntop(AF_INET6, &mask6->sin6_addr, mbuf, sizeof(abuf));
	+ } else
	+ mbuf[0] = '\0';
	+
	+ DPRINTF("Operating on %s/%d [%s]", abuf, plen, mbuf);
	+
	+ if (plen == 0) {
	+ struct rib_cmd_info rc;
	+
	+ bzero(&rc, sizeof(rc));
	+ rc.rc_cmd = RTM_ADD;
	+ rc.rc_nh_new = rt->rt_nhop;
	+ DPRINTF("Adding default route");
	+ return (handle_default_change(dd, &rc));
	+ }
	+
	+ uint32_t nhidx = fib_get_nhop_idx(dd->fd, rt->rt_nhop);
	+ if (nhidx == 0) {
	+ DPRINTF("unable to get nhop index");
	+ return (FLM_REBUILD);
	+ }
	+ ret = rte_lpm6_add(dd->lpm6, (const uint8_t *)&sin6->sin6_addr, plen,
	+ nhidx, 1);
	+ DPRINTF("ADD %p %s/%d nh %u = %d", dd->lpm6, abuf, plen, nhidx, ret);
	+
	+ if (ret != 0) {
	+ DPRINTF("rte_lpm6_add() returned %d", ret);
	+ if (ret == -ENOSPC) {
	+ dd->hit_tables = 1;
	+ return (FLM_REBUILD);
	+ }
	+ dd->routes_failed++;
	+ } else
	+ dd->routes_added++;
	+
	+ return (FLM_SUCCESS);
	+}
	+
	+static enum flm_op_result
	+check_dump_success(void _data, struct fib_dp dp)
	+{
	+ struct dpdk_lpm6_data *dd;
	+
	+ dd = (struct dpdk_lpm6_data *)_data;
	+
	+ DPRINTF("scan completed. added: %zu failed: %zu",
	+ dd->routes_added, dd->routes_failed);
	+ if (dd->hit_tables \|\| dd->routes_failed > 0)
	+ return (FLM_REBUILD);
	+
	+ DPRINTF("DPDK lookup engine synced with IPv6 RIB id %u, %zu routes",
	+ dd->fibnum, dd->routes_added);
	+
	+ dp->f = lookup_ptr;
	+ dp->arg = dd->lpm6;
	+
	+ return (FLM_SUCCESS);
	+}
	+
	+static void
	+estimate_scale(const struct dpdk_lpm6_data dd_src, struct dpdk_lpm6_data dd)
	+{
	+
	+ /* XXX: update at 75% capacity */
	+ if (dd_src->hit_tables)
	+ dd->number_tbl8s = dd_src->number_tbl8s * 2;
	+ else
	+ dd->number_tbl8s = dd_src->number_tbl8s;
	+
	+ /* TODO: look into the appropriate RIB to adjust */
	+}
	+
	+static struct dpdk_lpm6_data *
	+build_table(struct dpdk_lpm6_data *dd_prev)
	+{
	+ struct dpdk_lpm6_data *dd;
	+ struct rte_lpm6 *lpm6;
	+
	+ dd = malloc(sizeof(struct dpdk_lpm6_data), M_TEMP, M_NOWAIT \| M_ZERO);
	+ if (dd == NULL) {
	+ DPRINTF("Unable to allocate base datastructure");
	+ return (NULL);
	+ }
	+ dd->fibnum = dd_prev->fibnum;
	+ dd->fd = dd_prev->fd;
	+
	+ estimate_scale(dd_prev, dd);
	+
	+ struct rte_lpm6_config cfg = {.number_tbl8s = dd->number_tbl8s};
	+ lpm6 = rte_lpm6_create("test", 0, &cfg);
	+ if (lpm6 == NULL) {
	+ DPRINTF("unable to create lpm6");
	+ free(dd, M_TEMP);
	+ return (NULL);
	+ }
	+ dd->lpm6 = lpm6;
	+ struct rte_lpm6_external ext = (struct rte_lpm6_external )lpm6;
	+ ext->nh_idx = fib_get_nhop_array(dd->fd);
	+
	+ DPRINTF("allocated %u tbl8s", dd->number_tbl8s);
	+
	+ return (dd);
	+}
	+
	+static enum flm_op_result
	+init_table(uint32_t fibnum, struct fib_data fd, void _old_data, void **data)
	+{
	+ struct dpdk_lpm6_data *dd, dd_base;
	+
	+ if (_old_data == NULL) {
	+ bzero(&dd_base, sizeof(struct dpdk_lpm6_data));
	+ dd_base.fibnum = fibnum;
	+ dd_base.fd = fd;
	+ /* TODO: get rib statistics */
	+ dd_base.number_tbl8s = LPM6_MIN_TBL8;
	+ dd = &dd_base;
	+ } else {
	+ DPRINTF("Starting with old data");
	+ dd = (struct dpdk_lpm6_data *)_old_data;
	+ }
	+
	+ /* Guaranteed to be in epoch */
	+ dd = build_table(dd);
	+ if (dd == NULL) {
	+ DPRINTF("table creation failed");
	+ return (FLM_REBUILD);
	+ }
	+
	+ *data = dd;
	+ return (FLM_SUCCESS);
	+}
	+
	+static struct fib_lookup_module dpdk_lpm6 = {
	+ .flm_name = "dpdk_lpm6",
	+ .flm_family = AF_INET6,
	+ .flm_init_cb = init_table,
	+ .flm_destroy_cb = destroy_table,
	+ .flm_dump_rib_item_cb = add_route_cb,
	+ .flm_dump_end_cb = check_dump_success,
	+ .flm_change_rib_item_cb = handle_rtable_change_cb,
	+ .flm_get_pref = rte6_get_pref,
	+};
	+
	+static int
	+lpm6_modevent(module_t mod, int type, void *unused)
	+{
	+ int error = 0;
	+
	+ switch (type) {
	+ case MOD_LOAD:
	+ fib_module_register(&dpdk_lpm6);
	+ break;
	+ case MOD_UNLOAD:
	+ error = fib_module_unregister(&dpdk_lpm6);
	+ break;
	+ default:
	+ error = EOPNOTSUPP;
	+ break;
	+ }
	+ return (error);
	+}
	+
	+static moduledata_t lpm6mod = {
	+ "dpdk_lpm6",
	+ lpm6_modevent,
	+ 0
	+};
	+
	+DECLARE_MODULE(lpm6mod, lpm6mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
	+MODULE_VERSION(lpm6mod, 1);
	Index: sys/contrib/dpdk_rte_lpm/rte_branch_prediction.h
	===================================================================
	--- /dev/null
	+++ sys/contrib/dpdk_rte_lpm/rte_branch_prediction.h
	@@ -0,0 +1,41 @@
	+/* SPDX-License-Identifier: BSD-3-Clause
	+ * Copyright(c) 2010-2014 Intel Corporation
	+ */
	+
	+/**
	+ * @file
	+ * Branch Prediction Helpers in RTE
	+ */
	+
	+#ifndef _RTE_BRANCH_PREDICTION_H_
	+#define _RTE_BRANCH_PREDICTION_H_
	+
	+/**
	+ * Check if a branch is likely to be taken.
	+ *
	+ * This compiler builtin allows the developer to indicate if a branch is
	+ * likely to be taken. Example:
	+ *
	+ * if (likely(x > 1))
	+ * do_stuff();
	+ *
	+ */
	+#ifndef likely
	+#define likely(x) __builtin_expect(!!(x), 1)
	+#endif /* likely */
	+
	+/**
	+ * Check if a branch is unlikely to be taken.
	+ *
	+ * This compiler builtin allows the developer to indicate if a branch is
	+ * unlikely to be taken. Example:
	+ *
	+ * if (unlikely(x < 1))
	+ * do_stuff();
	+ *
	+ */
	+#ifndef unlikely
	+#define unlikely(x) __builtin_expect(!!(x), 0)
	+#endif /* unlikely */
	+
	+#endif /* _RTE_BRANCH_PREDICTION_H_ */
	Index: sys/contrib/dpdk_rte_lpm/rte_common.h
	===================================================================
	--- /dev/null
	+++ sys/contrib/dpdk_rte_lpm/rte_common.h
	@@ -0,0 +1,838 @@
	+/* SPDX-License-Identifier: BSD-3-Clause
	+ * Copyright(c) 2010-2019 Intel Corporation
	+ */
	+
	+#ifndef _RTE_COMMON_H_
	+#define _RTE_COMMON_H_
	+
	+/**
	+ * @file
	+ *
	+ * Generic, commonly-used macro and inline function definitions
	+ * for DPDK.
	+ */
	+
	+#ifdef __cplusplus
	+extern "C" {
	+#endif
	+
	+//#include <rte_config.h>
	+
	+/* OS specific include */
	+//#include <rte_os.h>
	+
	+#ifndef typeof
	+#define typeof __typeof__
	+#endif
	+
	+#ifndef asm
	+#define asm __asm__
	+#endif
	+
	+/** C extension macro for environments lacking C11 features. */
	+#if !defined(__STDC_VERSION__) \|\| __STDC_VERSION__ < 201112L
	+#define RTE_STD_C11 __extension__
	+#else
	+#define RTE_STD_C11
	+#endif
	+
	+/*
	+ * RTE_TOOLCHAIN_GCC is defined if the target is built with GCC,
	+ * while a host application (like pmdinfogen) may have another compiler.
	+ * RTE_CC_IS_GNU is true if the file is compiled with GCC,
	+ * no matter it is a target or host application.
	+ */
	+#define RTE_CC_IS_GNU 0
	+#if defined __clang__
	+#define RTE_CC_CLANG
	+#elif defined __INTEL_COMPILER
	+#define RTE_CC_ICC
	+#elif defined __GNUC__
	+#define RTE_CC_GCC
	+#undef RTE_CC_IS_GNU
	+#define RTE_CC_IS_GNU 1
	+#endif
	+#if RTE_CC_IS_GNU
	+#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + \
	+ __GNUC_PATCHLEVEL__)
	+#endif
	+
	+/**
	+ * Force alignment
	+ */
	+#define __rte_aligned(a) __attribute__((__aligned__(a)))
	+
	+#ifdef RTE_ARCH_STRICT_ALIGN
	+typedef uint64_t unaligned_uint64_t __rte_aligned(1);
	+typedef uint32_t unaligned_uint32_t __rte_aligned(1);
	+typedef uint16_t unaligned_uint16_t __rte_aligned(1);
	+#else
	+typedef uint64_t unaligned_uint64_t;
	+typedef uint32_t unaligned_uint32_t;
	+typedef uint16_t unaligned_uint16_t;
	+#endif
	+
	+/**
	+ * Force a structure to be packed
	+ */
	+#define __rte_packed __attribute__((__packed__))
	+
	+/***** Macro to mark functions and fields scheduled for removal ***/
	+#define __rte_deprecated __attribute__((__deprecated__))
	+
	+/**
	+ * Mark a function or variable to a weak reference.
	+ */
	+#define __rte_weak __attribute__((__weak__))
	+
	+/**
	+ * Force symbol to be generated even if it appears to be unused.
	+ */
	+#define __rte_used __attribute__((used))
	+
	+/********* Macros to eliminate unused variable warnings ******/
	+
	+/**
	+ * short definition to mark a function parameter unused
	+ */
	+#define __rte_unused __attribute__((__unused__))
	+
	+/**
	+ * definition to mark a variable or function parameter as used so
	+ * as to avoid a compiler warning
	+ */
	+#define RTE_SET_USED(x) (void)(x)
	+
	+/**
	+ * Check format string and its arguments at compile-time.
	+ *
	+ * GCC on Windows assumes MS-specific format string by default,
	+ * even if the underlying stdio implementation is ANSI-compliant,
	+ * so this must be overridden.
	+ */
	+#if RTE_CC_IS_GNU
	+#define __rte_format_printf(format_index, first_arg) \
	+ __attribute__((format(gnu_printf, format_index, first_arg)))
	+#else
	+#define __rte_format_printf(format_index, first_arg) \
	+ __attribute__((format(printf, format_index, first_arg)))
	+#endif
	+
	+#define RTE_PRIORITY_LOG 101
	+#define RTE_PRIORITY_BUS 110
	+#define RTE_PRIORITY_CLASS 120
	+#define RTE_PRIORITY_LAST 65535
	+
	+#define RTE_PRIO(prio) \
	+ RTE_PRIORITY_ ## prio
	+
	+/**
	+ * Run function before main() with high priority.
	+ *
	+ * @param func
	+ * Constructor function.
	+ * @param prio
	+ * Priority number must be above 100.
	+ * Lowest number is the first to run.
	+ */
	+#ifndef RTE_INIT_PRIO /* Allow to override from EAL */
	+#define RTE_INIT_PRIO(func, prio) \
	+static void __attribute__((constructor(RTE_PRIO(prio)), used)) func(void)
	+#endif
	+
	+/**
	+ * Run function before main() with low priority.
	+ *
	+ * The constructor will be run after prioritized constructors.
	+ *
	+ * @param func
	+ * Constructor function.
	+ */
	+#define RTE_INIT(func) \
	+ RTE_INIT_PRIO(func, LAST)
	+
	+/**
	+ * Run after main() with low priority.
	+ *
	+ * @param func
	+ * Destructor function name.
	+ * @param prio
	+ * Priority number must be above 100.
	+ * Lowest number is the last to run.
	+ */
	+#ifndef RTE_FINI_PRIO /* Allow to override from EAL */
	+#define RTE_FINI_PRIO(func, prio) \
	+static void __attribute__((destructor(RTE_PRIO(prio)), used)) func(void)
	+#endif
	+
	+/**
	+ * Run after main() with high priority.
	+ *
	+ * The destructor will be run before prioritized destructors.
	+ *
	+ * @param func
	+ * Destructor function name.
	+ */
	+#define RTE_FINI(func) \
	+ RTE_FINI_PRIO(func, LAST)
	+
	+/**
	+ * Hint never returning function
	+ */
	+#define __rte_noreturn __attribute__((noreturn))
	+
	+/**
	+ * Force a function to be inlined
	+ */
	+#define __rte_always_inline inline __attribute__((always_inline))
	+
	+/**
	+ * Force a function to be noinlined
	+ */
	+#define __rte_noinline __attribute__((noinline))
	+
	+/**
	+ * Hint function in the hot path
	+ */
	+#define __rte_hot __attribute__((hot))
	+
	+/**
	+ * Hint function in the cold path
	+ */
	+#define __rte_cold __attribute__((cold))
	+
	+/********* Macros for pointer arithmetic ******/
	+
	+/**
	+ * add a byte-value offset to a pointer
	+ */
	+#define RTE_PTR_ADD(ptr, x) ((void*)((uintptr_t)(ptr) + (x)))
	+
	+/**
	+ * subtract a byte-value offset from a pointer
	+ */
	+#define RTE_PTR_SUB(ptr, x) ((void*)((uintptr_t)ptr - (x)))
	+
	+/**
	+ * get the difference between two pointer values, i.e. how far apart
	+ * in bytes are the locations they point two. It is assumed that
	+ * ptr1 is greater than ptr2.
	+ */
	+#define RTE_PTR_DIFF(ptr1, ptr2) ((uintptr_t)(ptr1) - (uintptr_t)(ptr2))
	+
	+/**
	+ * Workaround to cast a const field of a structure to non-const type.
	+ */
	+#define RTE_CAST_FIELD(var, field, type) \
	+ ((type )((uintptr_t)(var) + offsetof(typeof(*(var)), field)))
	+
	+/********* Macros/static functions for doing alignment ******/
	+
	+
	+/**
	+ * Macro to align a pointer to a given power-of-two. The resultant
	+ * pointer will be a pointer of the same type as the first parameter, and
	+ * point to an address no higher than the first parameter. Second parameter
	+ * must be a power-of-two value.
	+ */
	+#define RTE_PTR_ALIGN_FLOOR(ptr, align) \
	+ ((typeof(ptr))RTE_ALIGN_FLOOR((uintptr_t)ptr, align))
	+
	+/**
	+ * Macro to align a value to a given power-of-two. The resultant value
	+ * will be of the same type as the first parameter, and will be no
	+ * bigger than the first parameter. Second parameter must be a
	+ * power-of-two value.
	+ */
	+#define RTE_ALIGN_FLOOR(val, align) \
	+ (typeof(val))((val) & (~((typeof(val))((align) - 1))))
	+
	+/**
	+ * Macro to align a pointer to a given power-of-two. The resultant
	+ * pointer will be a pointer of the same type as the first parameter, and
	+ * point to an address no lower than the first parameter. Second parameter
	+ * must be a power-of-two value.
	+ */
	+#define RTE_PTR_ALIGN_CEIL(ptr, align) \
	+ RTE_PTR_ALIGN_FLOOR((typeof(ptr))RTE_PTR_ADD(ptr, (align) - 1), align)
	+
	+/**
	+ * Macro to align a value to a given power-of-two. The resultant value
	+ * will be of the same type as the first parameter, and will be no lower
	+ * than the first parameter. Second parameter must be a power-of-two
	+ * value.
	+ */
	+#define RTE_ALIGN_CEIL(val, align) \
	+ RTE_ALIGN_FLOOR(((val) + ((typeof(val)) (align) - 1)), align)
	+
	+/**
	+ * Macro to align a pointer to a given power-of-two. The resultant
	+ * pointer will be a pointer of the same type as the first parameter, and
	+ * point to an address no lower than the first parameter. Second parameter
	+ * must be a power-of-two value.
	+ * This function is the same as RTE_PTR_ALIGN_CEIL
	+ */
	+#define RTE_PTR_ALIGN(ptr, align) RTE_PTR_ALIGN_CEIL(ptr, align)
	+
	+/**
	+ * Macro to align a value to a given power-of-two. The resultant
	+ * value will be of the same type as the first parameter, and
	+ * will be no lower than the first parameter. Second parameter
	+ * must be a power-of-two value.
	+ * This function is the same as RTE_ALIGN_CEIL
	+ */
	+#define RTE_ALIGN(val, align) RTE_ALIGN_CEIL(val, align)
	+
	+/**
	+ * Macro to align a value to the multiple of given value. The resultant
	+ * value will be of the same type as the first parameter and will be no lower
	+ * than the first parameter.
	+ */
	+#define RTE_ALIGN_MUL_CEIL(v, mul) \
	+ (((v + (typeof(v))(mul) - 1) / ((typeof(v))(mul))) * (typeof(v))(mul))
	+
	+/**
	+ * Macro to align a value to the multiple of given value. The resultant
	+ * value will be of the same type as the first parameter and will be no higher
	+ * than the first parameter.
	+ */
	+#define RTE_ALIGN_MUL_FLOOR(v, mul) \
	+ ((v / ((typeof(v))(mul))) * (typeof(v))(mul))
	+
	+/**
	+ * Macro to align value to the nearest multiple of the given value.
	+ * The resultant value might be greater than or less than the first parameter
	+ * whichever difference is the lowest.
	+ */
	+#define RTE_ALIGN_MUL_NEAR(v, mul) \
	+ ({ \
	+ typeof(v) ceil = RTE_ALIGN_MUL_CEIL(v, mul); \
	+ typeof(v) floor = RTE_ALIGN_MUL_FLOOR(v, mul); \
	+ (ceil - v) > (v - floor) ? floor : ceil; \
	+ })
	+
	+/**
	+ * Checks if a pointer is aligned to a given power-of-two value
	+ *
	+ * @param ptr
	+ * The pointer whose alignment is to be checked
	+ * @param align
	+ * The power-of-two value to which the ptr should be aligned
	+ *
	+ * @return
	+ * True(1) where the pointer is correctly aligned, false(0) otherwise
	+ */
	+static inline int
	+rte_is_aligned(void *ptr, unsigned align)
	+{
	+ return RTE_PTR_ALIGN(ptr, align) == ptr;
	+}
	+
	+/********* Macros for compile type checks ******/
	+
	+/**
	+ * Triggers an error at compilation time if the condition is true.
	+ */
	+#define RTE_BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
	+
	+/********* Cache line related macros ******/
	+
	+/** Cache line mask. */
	+#define RTE_CACHE_LINE_MASK (RTE_CACHE_LINE_SIZE-1)
	+
	+/** Return the first cache-aligned value greater or equal to size. */
	+#define RTE_CACHE_LINE_ROUNDUP(size) \
	+ (RTE_CACHE_LINE_SIZE * ((size + RTE_CACHE_LINE_SIZE - 1) / \
	+ RTE_CACHE_LINE_SIZE))
	+
	+/** Cache line size in terms of log2 */
	+#if RTE_CACHE_LINE_SIZE == 64
	+#define RTE_CACHE_LINE_SIZE_LOG2 6
	+#elif RTE_CACHE_LINE_SIZE == 128
	+#define RTE_CACHE_LINE_SIZE_LOG2 7
	+#else
	+#error "Unsupported cache line size"
	+#endif
	+
	+/** Minimum Cache line size. */
	+#define RTE_CACHE_LINE_MIN_SIZE 64
	+
	+/** Force alignment to cache line. */
	+#define __rte_cache_aligned __rte_aligned(RTE_CACHE_LINE_SIZE)
	+
	+/** Force minimum cache line alignment. */
	+#define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE)
	+
	+/********* PA/IOVA type definitions ******/
	+
	+/** Physical address */
	+typedef uint64_t phys_addr_t;
	+#define RTE_BAD_PHYS_ADDR ((phys_addr_t)-1)
	+
	+/**
	+ * IO virtual address type.
	+ * When the physical addressing mode (IOVA as PA) is in use,
	+ * the translation from an IO virtual address (IOVA) to a physical address
	+ * is a direct mapping, i.e. the same value.
	+ * Otherwise, in virtual mode (IOVA as VA), an IOMMU may do the translation.
	+ */
	+typedef uint64_t rte_iova_t;
	+#define RTE_BAD_IOVA ((rte_iova_t)-1)
	+
	+/********* Structure alignment markers ******/
	+
	+/** Generic marker for any place in a structure. */
	+__extension__ typedef void *RTE_MARKER[0];
	+/** Marker for 1B alignment in a structure. */
	+__extension__ typedef uint8_t RTE_MARKER8[0];
	+/** Marker for 2B alignment in a structure. */
	+__extension__ typedef uint16_t RTE_MARKER16[0];
	+/** Marker for 4B alignment in a structure. */
	+__extension__ typedef uint32_t RTE_MARKER32[0];
	+/** Marker for 8B alignment in a structure. */
	+__extension__ typedef uint64_t RTE_MARKER64[0];
	+
	+/**
	+ * Combines 32b inputs most significant set bits into the least
	+ * significant bits to construct a value with the same MSBs as x
	+ * but all 1's under it.
	+ *
	+ * @param x
	+ * The integer whose MSBs need to be combined with its LSBs
	+ * @return
	+ * The combined value.
	+ */
	+static inline uint32_t
	+rte_combine32ms1b(register uint32_t x)
	+{
	+ x \|= x >> 1;
	+ x \|= x >> 2;
	+ x \|= x >> 4;
	+ x \|= x >> 8;
	+ x \|= x >> 16;
	+
	+ return x;
	+}
	+
	+/**
	+ * Combines 64b inputs most significant set bits into the least
	+ * significant bits to construct a value with the same MSBs as x
	+ * but all 1's under it.
	+ *
	+ * @param v
	+ * The integer whose MSBs need to be combined with its LSBs
	+ * @return
	+ * The combined value.
	+ */
	+static inline uint64_t
	+rte_combine64ms1b(register uint64_t v)
	+{
	+ v \|= v >> 1;
	+ v \|= v >> 2;
	+ v \|= v >> 4;
	+ v \|= v >> 8;
	+ v \|= v >> 16;
	+ v \|= v >> 32;
	+
	+ return v;
	+}
	+
	+/********* Macros to work with powers of 2 ******/
	+
	+/**
	+ * Macro to return 1 if n is a power of 2, 0 otherwise
	+ */
	+#define RTE_IS_POWER_OF_2(n) ((n) && !(((n) - 1) & (n)))
	+
	+/**
	+ * Returns true if n is a power of 2
	+ * @param n
	+ * Number to check
	+ * @return 1 if true, 0 otherwise
	+ */
	+static inline int
	+rte_is_power_of_2(uint32_t n)
	+{
	+ return n && !(n & (n - 1));
	+}
	+
	+/**
	+ * Aligns input parameter to the next power of 2
	+ *
	+ * @param x
	+ * The integer value to align
	+ *
	+ * @return
	+ * Input parameter aligned to the next power of 2
	+ */
	+static inline uint32_t
	+rte_align32pow2(uint32_t x)
	+{
	+ x--;
	+ x = rte_combine32ms1b(x);
	+
	+ return x + 1;
	+}
	+
	+/**
	+ * Aligns input parameter to the previous power of 2
	+ *
	+ * @param x
	+ * The integer value to align
	+ *
	+ * @return
	+ * Input parameter aligned to the previous power of 2
	+ */
	+static inline uint32_t
	+rte_align32prevpow2(uint32_t x)
	+{
	+ x = rte_combine32ms1b(x);
	+
	+ return x - (x >> 1);
	+}
	+
	+/**
	+ * Aligns 64b input parameter to the next power of 2
	+ *
	+ * @param v
	+ * The 64b value to align
	+ *
	+ * @return
	+ * Input parameter aligned to the next power of 2
	+ */
	+static inline uint64_t
	+rte_align64pow2(uint64_t v)
	+{
	+ v--;
	+ v = rte_combine64ms1b(v);
	+
	+ return v + 1;
	+}
	+
	+/**
	+ * Aligns 64b input parameter to the previous power of 2
	+ *
	+ * @param v
	+ * The 64b value to align
	+ *
	+ * @return
	+ * Input parameter aligned to the previous power of 2
	+ */
	+static inline uint64_t
	+rte_align64prevpow2(uint64_t v)
	+{
	+ v = rte_combine64ms1b(v);
	+
	+ return v - (v >> 1);
	+}
	+
	+/********* Macros for calculating min and max ********/
	+
	+/**
	+ * Macro to return the minimum of two numbers
	+ */
	+#define RTE_MIN(a, b) \
	+ __extension__ ({ \
	+ typeof (a) _a = (a); \
	+ typeof (b) _b = (b); \
	+ _a < _b ? _a : _b; \
	+ })
	+
	+/**
	+ * Macro to return the maximum of two numbers
	+ */
	+#define RTE_MAX(a, b) \
	+ __extension__ ({ \
	+ typeof (a) _a = (a); \
	+ typeof (b) _b = (b); \
	+ _a > _b ? _a : _b; \
	+ })
	+
	+/********* Other general functions / macros ******/
	+
	+/**
	+ * Searches the input parameter for the least significant set bit
	+ * (starting from zero).
	+ * If a least significant 1 bit is found, its bit index is returned.
	+ * If the content of the input parameter is zero, then the content of the return
	+ * value is undefined.
	+ * @param v
	+ * input parameter, should not be zero.
	+ * @return
	+ * least significant set bit in the input parameter.
	+ */
	+static inline uint32_t
	+rte_bsf32(uint32_t v)
	+{
	+ return (uint32_t)__builtin_ctz(v);
	+}
	+
	+/**
	+ * Searches the input parameter for the least significant set bit
	+ * (starting from zero). Safe version (checks for input parameter being zero).
	+ *
	+ * @warning ``pos`` must be a valid pointer. It is not checked!
	+ *
	+ * @param v
	+ * The input parameter.
	+ * @param pos
	+ * If ``v`` was not 0, this value will contain position of least significant
	+ * bit within the input parameter.
	+ * @return
	+ * Returns 0 if ``v`` was 0, otherwise returns 1.
	+ */
	+static inline int
	+rte_bsf32_safe(uint64_t v, uint32_t *pos)
	+{
	+ if (v == 0)
	+ return 0;
	+
	+ *pos = rte_bsf32(v);
	+ return 1;
	+}
	+
	+/**
	+ * Return the rounded-up log2 of a integer.
	+ *
	+ * @note Contrary to the logarithm mathematical operation,
	+ * rte_log2_u32(0) == 0 and not -inf.
	+ *
	+ * @param v
	+ * The input parameter.
	+ * @return
	+ * The rounded-up log2 of the input, or 0 if the input is 0.
	+ */
	+static inline uint32_t
	+rte_log2_u32(uint32_t v)
	+{
	+ if (v == 0)
	+ return 0;
	+ v = rte_align32pow2(v);
	+ return rte_bsf32(v);
	+}
	+
	+
	+/**
	+ * Return the last (most-significant) bit set.
	+ *
	+ * @note The last (most significant) bit is at position 32.
	+ * @note rte_fls_u32(0) = 0, rte_fls_u32(1) = 1, rte_fls_u32(0x80000000) = 32
	+ *
	+ * @param x
	+ * The input parameter.
	+ * @return
	+ * The last (most-significant) bit set, or 0 if the input is 0.
	+ */
	+static inline int
	+rte_fls_u32(uint32_t x)
	+{
	+ return (x == 0) ? 0 : 32 - __builtin_clz(x);
	+}
	+
	+/**
	+ * Searches the input parameter for the least significant set bit
	+ * (starting from zero).
	+ * If a least significant 1 bit is found, its bit index is returned.
	+ * If the content of the input parameter is zero, then the content of the return
	+ * value is undefined.
	+ * @param v
	+ * input parameter, should not be zero.
	+ * @return
	+ * least significant set bit in the input parameter.
	+ */
	+static inline int
	+rte_bsf64(uint64_t v)
	+{
	+ return (uint32_t)__builtin_ctzll(v);
	+}
	+
	+/**
	+ * Searches the input parameter for the least significant set bit
	+ * (starting from zero). Safe version (checks for input parameter being zero).
	+ *
	+ * @warning ``pos`` must be a valid pointer. It is not checked!
	+ *
	+ * @param v
	+ * The input parameter.
	+ * @param pos
	+ * If ``v`` was not 0, this value will contain position of least significant
	+ * bit within the input parameter.
	+ * @return
	+ * Returns 0 if ``v`` was 0, otherwise returns 1.
	+ */
	+static inline int
	+rte_bsf64_safe(uint64_t v, uint32_t *pos)
	+{
	+ if (v == 0)
	+ return 0;
	+
	+ *pos = rte_bsf64(v);
	+ return 1;
	+}
	+
	+/**
	+ * Return the last (most-significant) bit set.
	+ *
	+ * @note The last (most significant) bit is at position 64.
	+ * @note rte_fls_u64(0) = 0, rte_fls_u64(1) = 1,
	+ * rte_fls_u64(0x8000000000000000) = 64
	+ *
	+ * @param x
	+ * The input parameter.
	+ * @return
	+ * The last (most-significant) bit set, or 0 if the input is 0.
	+ */
	+static inline int
	+rte_fls_u64(uint64_t x)
	+{
	+ return (x == 0) ? 0 : 64 - __builtin_clzll(x);
	+}
	+
	+/**
	+ * Return the rounded-up log2 of a 64-bit integer.
	+ *
	+ * @note Contrary to the logarithm mathematical operation,
	+ * rte_log2_u64(0) == 0 and not -inf.
	+ *
	+ * @param v
	+ * The input parameter.
	+ * @return
	+ * The rounded-up log2 of the input, or 0 if the input is 0.
	+ */
	+static inline uint32_t
	+rte_log2_u64(uint64_t v)
	+{
	+ if (v == 0)
	+ return 0;
	+ v = rte_align64pow2(v);
	+ /* we checked for v being 0 already, so no undefined behavior */
	+ return rte_bsf64(v);
	+}
	+
	+#ifndef offsetof
	+/** Return the offset of a field in a structure. */
	+#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
	+#endif
	+
	+/**
	+ * Return pointer to the wrapping struct instance.
	+ *
	+ * Example:
	+ *
	+ * struct wrapper {
	+ * ...
	+ * struct child c;
	+ * ...
	+ * };
	+ *
	+ * struct child *x = obtain(...);
	+ * struct wrapper *w = container_of(x, struct wrapper, c);
	+ */
	+#ifndef container_of
	+#define container_of(ptr, type, member) __extension__ ({ \
	+ const typeof(((type )0)->member) _ptr = (ptr); \
	+ __rte_unused type *_target_ptr = \
	+ (type *)(ptr); \
	+ (type *)(((uintptr_t)_ptr) - offsetof(type, member)); \
	+ })
	+#endif
	+
	+/**
	+ * Get the size of a field in a structure.
	+ *
	+ * @param type
	+ * The type of the structure.
	+ * @param field
	+ * The field in the structure.
	+ * @return
	+ * The size of the field in the structure, in bytes.
	+ */
	+#define RTE_SIZEOF_FIELD(type, field) (sizeof(((type *)0)->field))
	+
	+#define _RTE_STR(x) #x
	+/** Take a macro value and get a string version of it */
	+#define RTE_STR(x) _RTE_STR(x)
	+
	+/**
	+ * ISO C helpers to modify format strings using variadic macros.
	+ * This is a replacement for the ", ## __VA_ARGS__" GNU extension.
	+ * An empty %s argument is appended to avoid a dangling comma.
	+ */
	+#define RTE_FMT(fmt, ...) fmt "%.0s", __VA_ARGS__ ""
	+#define RTE_FMT_HEAD(fmt, ...) fmt
	+#define RTE_FMT_TAIL(fmt, ...) __VA_ARGS__
	+
	+/** Mask value of type "tp" for the first "ln" bit set. */
	+#define RTE_LEN2MASK(ln, tp) \
	+ ((tp)((uint64_t)-1 >> (sizeof(uint64_t) * CHAR_BIT - (ln))))
	+
	+/** Number of elements in the array. */
	+#define RTE_DIM(a) (sizeof (a) / sizeof ((a)[0]))
	+
	+/**
	+ * Converts a numeric string to the equivalent uint64_t value.
	+ * As well as straight number conversion, also recognises the suffixes
	+ * k, m and g for kilobytes, megabytes and gigabytes respectively.
	+ *
	+ * If a negative number is passed in i.e. a string with the first non-black
	+ * character being "-", zero is returned. Zero is also returned in the case of
	+ * an error with the strtoull call in the function.
	+ *
	+ * @param str
	+ * String containing number to convert.
	+ * @return
	+ * Number.
	+ */
	+#if 0
	+static inline uint64_t
	+rte_str_to_size(const char *str)
	+{
	+ char *endptr;
	+ unsigned long long size;
	+
	+ while (isspace((int)*str))
	+ str++;
	+ if (*str == '-')
	+ return 0;
	+
	+ errno = 0;
	+ size = strtoull(str, &endptr, 0);
	+ if (errno)
	+ return 0;
	+
	+ if (*endptr == ' ')
	+ endptr++; /* allow 1 space gap */
	+
	+ switch (*endptr){
	+ case 'G': case 'g': size = 1024; / fall-through */
	+ case 'M': case 'm': size = 1024; / fall-through */
	+ case 'K': case 'k': size = 1024; / fall-through */
	+ default:
	+ break;
	+ }
	+ return size;
	+}
	+#endif
	+
	+/**
	+ * Function to terminate the application immediately, printing an error
	+ * message and returning the exit_code back to the shell.
	+ *
	+ * This function never returns
	+ *
	+ * @param exit_code
	+ * The exit code to be returned by the application
	+ * @param format
	+ * The format string to be used for printing the message. This can include
	+ * printf format characters which will be expanded using any further parameters
	+ * to the function.
	+ */
	+__rte_noreturn void
	+rte_exit(int exit_code, const char *format, ...)
	+ __rte_format_printf(2, 3);
	+
	+#ifdef __cplusplus
	+}
	+#endif
	+
	+#endif
	Index: sys/contrib/dpdk_rte_lpm/rte_debug.h
	===================================================================
	--- /dev/null
	+++ sys/contrib/dpdk_rte_lpm/rte_debug.h
	@@ -0,0 +1,83 @@
	+/* SPDX-License-Identifier: BSD-3-Clause
	+ * Copyright(c) 2010-2014 Intel Corporation
	+ */
	+
	+#ifndef _RTE_DEBUG_H_
	+#define _RTE_DEBUG_H_
	+
	+/**
	+ * @file
	+ *
	+ * Debug Functions in RTE
	+ *
	+ * This file defines a generic API for debug operations. Part of
	+ * the implementation is architecture-specific.
	+ */
	+
	+//#include "rte_log.h"
	+#include "rte_branch_prediction.h"
	+
	+#ifdef __cplusplus
	+extern "C" {
	+#endif
	+
	+/**
	+ * Dump the stack of the calling core to the console.
	+ */
	+void rte_dump_stack(void);
	+
	+/**
	+ * Dump the registers of the calling core to the console.
	+ *
	+ * Note: Not implemented in a userapp environment; use gdb instead.
	+ */
	+void rte_dump_registers(void);
	+
	+/**
	+ * Provide notification of a critical non-recoverable error and terminate
	+ * execution abnormally.
	+ *
	+ * Display the format string and its expanded arguments (printf-like).
	+ *
	+ * In a linux environment, this function dumps the stack and calls
	+ * abort() resulting in a core dump if enabled.
	+ *
	+ * The function never returns.
	+ *
	+ * @param ...
	+ * The format string, followed by the variable list of arguments.
	+ */
	+#define rte_panic(...) rte_panic_(__func__, __VA_ARGS__, "dummy")
	+#define rte_panic_(func, format, ...) __rte_panic(func, format "%.0s", __VA_ARGS__)
	+
	+#ifdef RTE_ENABLE_ASSERT
	+#define RTE_ASSERT(exp) RTE_VERIFY(exp)
	+#else
	+#define RTE_ASSERT(exp) do {} while (0)
	+#endif
	+#define RTE_VERIFY(exp) do { \
	+ if (unlikely(!(exp))) \
	+ rte_panic("line %d\tassert \"%s\" failed\n", __LINE__, #exp); \
	+} while (0)
	+
	+/*
	+ * Provide notification of a critical non-recoverable error and stop.
	+ *
	+ * This function should not be called directly. Refer to rte_panic() macro
	+ * documentation.
	+ */
	+void __rte_panic(const char funcname , const char format, ...)
	+{
	+#ifdef __GNUC__
	+#if (__GNUC__ > 4 \|\| (__GNUC__ == 4 && __GNUC_MINOR__ > 2))
	+ __rte_cold
	+#endif
	+#endif
	+ //__rte_noreturn
	+ //__rte_format_printf(2, 3);
	+
	+#ifdef __cplusplus
	+}
	+#endif
	+
	+#endif /* _RTE_DEBUG_H_ */
	Index: sys/contrib/dpdk_rte_lpm/rte_jhash.h
	===================================================================
	--- /dev/null
	+++ sys/contrib/dpdk_rte_lpm/rte_jhash.h
	@@ -0,0 +1,379 @@
	+/* SPDX-License-Identifier: BSD-3-Clause
	+ * Copyright(c) 2010-2015 Intel Corporation.
	+ */
	+
	+#ifndef _RTE_JHASH_H
	+#define _RTE_JHASH_H
	+
	+/**
	+ * @file
	+ *
	+ * jhash functions.
	+ */
	+
	+#ifdef __cplusplus
	+extern "C" {
	+#endif
	+
	+//#include <rte_byteorder.h>
	+
	+/* jhash.h: Jenkins hash support.
	+ *
	+ * Copyright (C) 2006 Bob Jenkins (bob_jenkins@burtleburtle.net)
	+ *
	+ * http://burtleburtle.net/bob/hash/
	+ *
	+ * These are the credits from Bob's sources:
	+ *
	+ * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
	+ *
	+ * These are functions for producing 32-bit hashes for hash table lookup.
	+ * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
	+ * are externally useful functions. Routines to test the hash are included
	+ * if SELF_TEST is defined. You can use this free for any purpose. It's in
	+ * the public domain. It has no warranty.
	+ *
	+ * $FreeBSD$
	+ */
	+
	+#define rot(x, k) (((x) << (k)) \| ((x) >> (32-(k))))
	+
	+/** @internal Internal function. NOTE: Arguments are modified. */
	+#define __rte_jhash_mix(a, b, c) do { \
	+ a -= c; a ^= rot(c, 4); c += b; \
	+ b -= a; b ^= rot(a, 6); a += c; \
	+ c -= b; c ^= rot(b, 8); b += a; \
	+ a -= c; a ^= rot(c, 16); c += b; \
	+ b -= a; b ^= rot(a, 19); a += c; \
	+ c -= b; c ^= rot(b, 4); b += a; \
	+} while (0)
	+
	+#define __rte_jhash_final(a, b, c) do { \
	+ c ^= b; c -= rot(b, 14); \
	+ a ^= c; a -= rot(c, 11); \
	+ b ^= a; b -= rot(a, 25); \
	+ c ^= b; c -= rot(b, 16); \
	+ a ^= c; a -= rot(c, 4); \
	+ b ^= a; b -= rot(a, 14); \
	+ c ^= b; c -= rot(b, 24); \
	+} while (0)
	+
	+/** The golden ratio: an arbitrary value. */
	+#define RTE_JHASH_GOLDEN_RATIO 0xdeadbeef
	+
	+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
	+#define BIT_SHIFT(x, y, k) (((x) >> (k)) \| ((uint64_t)(y) << (32-(k))))
	+#else
	+#define BIT_SHIFT(x, y, k) (((uint64_t)(x) << (k)) \| ((y) >> (32-(k))))
	+#endif
	+
	+#define LOWER8b_MASK rte_le_to_cpu_32(0xff)
	+#define LOWER16b_MASK rte_le_to_cpu_32(0xffff)
	+#define LOWER24b_MASK rte_le_to_cpu_32(0xffffff)
	+
	+static inline void
	+__rte_jhash_2hashes(const void key, uint32_t length, uint32_t pc,
	+ uint32_t *pb, unsigned check_align)
	+{
	+ uint32_t a, b, c;
	+
	+ /* Set up the internal state */
	+ a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + *pc;
	+ c += *pb;
	+
	+ /*
	+ * Check key alignment. For x86 architecture, first case is always optimal
	+ * If check_align is not set, first case will be used
	+ */
	+#if defined(RTE_ARCH_X86_64) \|\| defined(RTE_ARCH_I686) \|\| defined(RTE_ARCH_X86_X32)
	+ const uint32_t k = (const uint32_t )key;
	+ const uint32_t s = 0;
	+#else
	+ const uint32_t k = (uint32_t )((uintptr_t)key & (uintptr_t)~3);
	+ const uint32_t s = ((uintptr_t)key & 3) * CHAR_BIT;
	+#endif
	+ if (!check_align \|\| s == 0) {
	+ while (length > 12) {
	+ a += k[0];
	+ b += k[1];
	+ c += k[2];
	+
	+ __rte_jhash_mix(a, b, c);
	+
	+ k += 3;
	+ length -= 12;
	+ }
	+
	+ switch (length) {
	+ case 12:
	+ c += k[2]; b += k[1]; a += k[0]; break;
	+ case 11:
	+ c += k[2] & LOWER24b_MASK; b += k[1]; a += k[0]; break;
	+ case 10:
	+ c += k[2] & LOWER16b_MASK; b += k[1]; a += k[0]; break;
	+ case 9:
	+ c += k[2] & LOWER8b_MASK; b += k[1]; a += k[0]; break;
	+ case 8:
	+ b += k[1]; a += k[0]; break;
	+ case 7:
	+ b += k[1] & LOWER24b_MASK; a += k[0]; break;
	+ case 6:
	+ b += k[1] & LOWER16b_MASK; a += k[0]; break;
	+ case 5:
	+ b += k[1] & LOWER8b_MASK; a += k[0]; break;
	+ case 4:
	+ a += k[0]; break;
	+ case 3:
	+ a += k[0] & LOWER24b_MASK; break;
	+ case 2:
	+ a += k[0] & LOWER16b_MASK; break;
	+ case 1:
	+ a += k[0] & LOWER8b_MASK; break;
	+ /* zero length strings require no mixing */
	+ case 0:
	+ *pc = c;
	+ *pb = b;
	+ return;
	+ };
	+ } else {
	+ /* all but the last block: affect some 32 bits of (a, b, c) */
	+ while (length > 12) {
	+ a += BIT_SHIFT(k[0], k[1], s);
	+ b += BIT_SHIFT(k[1], k[2], s);
	+ c += BIT_SHIFT(k[2], k[3], s);
	+ __rte_jhash_mix(a, b, c);
	+
	+ k += 3;
	+ length -= 12;
	+ }
	+
	+ /* last block: affect all 32 bits of (c) */
	+ switch (length) {
	+ case 12:
	+ a += BIT_SHIFT(k[0], k[1], s);
	+ b += BIT_SHIFT(k[1], k[2], s);
	+ c += BIT_SHIFT(k[2], k[3], s);
	+ break;
	+ case 11:
	+ a += BIT_SHIFT(k[0], k[1], s);
	+ b += BIT_SHIFT(k[1], k[2], s);
	+ c += BIT_SHIFT(k[2], k[3], s) & LOWER24b_MASK;
	+ break;
	+ case 10:
	+ a += BIT_SHIFT(k[0], k[1], s);
	+ b += BIT_SHIFT(k[1], k[2], s);
	+ c += BIT_SHIFT(k[2], k[3], s) & LOWER16b_MASK;
	+ break;
	+ case 9:
	+ a += BIT_SHIFT(k[0], k[1], s);
	+ b += BIT_SHIFT(k[1], k[2], s);
	+ c += BIT_SHIFT(k[2], k[3], s) & LOWER8b_MASK;
	+ break;
	+ case 8:
	+ a += BIT_SHIFT(k[0], k[1], s);
	+ b += BIT_SHIFT(k[1], k[2], s);
	+ break;
	+ case 7:
	+ a += BIT_SHIFT(k[0], k[1], s);
	+ b += BIT_SHIFT(k[1], k[2], s) & LOWER24b_MASK;
	+ break;
	+ case 6:
	+ a += BIT_SHIFT(k[0], k[1], s);
	+ b += BIT_SHIFT(k[1], k[2], s) & LOWER16b_MASK;
	+ break;
	+ case 5:
	+ a += BIT_SHIFT(k[0], k[1], s);
	+ b += BIT_SHIFT(k[1], k[2], s) & LOWER8b_MASK;
	+ break;
	+ case 4:
	+ a += BIT_SHIFT(k[0], k[1], s);
	+ break;
	+ case 3:
	+ a += BIT_SHIFT(k[0], k[1], s) & LOWER24b_MASK;
	+ break;
	+ case 2:
	+ a += BIT_SHIFT(k[0], k[1], s) & LOWER16b_MASK;
	+ break;
	+ case 1:
	+ a += BIT_SHIFT(k[0], k[1], s) & LOWER8b_MASK;
	+ break;
	+ /* zero length strings require no mixing */
	+ case 0:
	+ *pc = c;
	+ *pb = b;
	+ return;
	+ }
	+ }
	+
	+ __rte_jhash_final(a, b, c);
	+
	+ *pc = c;
	+ *pb = b;
	+}
	+
	+/**
	+ * Same as rte_jhash, but takes two seeds and return two uint32_ts.
	+ * pc and pb must be non-null, and pc and pb must both be initialized
	+ * with seeds. If you pass in (pb)=0, the output (pc) will be
	+ * the same as the return value from rte_jhash.
	+ *
	+ * @param key
	+ * Key to calculate hash of.
	+ * @param length
	+ * Length of key in bytes.
	+ * @param pc
	+ * IN: seed OUT: primary hash value.
	+ * @param pb
	+ * IN: second seed OUT: secondary hash value.
	+ */
	+static inline void
	+rte_jhash_2hashes(const void key, uint32_t length, uint32_t pc, uint32_t *pb)
	+{
	+ __rte_jhash_2hashes(key, length, pc, pb, 1);
	+}
	+
	+/**
	+ * Same as rte_jhash_32b, but takes two seeds and return two uint32_ts.
	+ * pc and pb must be non-null, and pc and pb must both be initialized
	+ * with seeds. If you pass in (pb)=0, the output (pc) will be
	+ * the same as the return value from rte_jhash_32b.
	+ *
	+ * @param k
	+ * Key to calculate hash of.
	+ * @param length
	+ * Length of key in units of 4 bytes.
	+ * @param pc
	+ * IN: seed OUT: primary hash value.
	+ * @param pb
	+ * IN: second seed OUT: secondary hash value.
	+ */
	+static inline void
	+rte_jhash_32b_2hashes(const uint32_t k, uint32_t length, uint32_t pc, uint32_t *pb)
	+{
	+ __rte_jhash_2hashes((const void *) k, (length << 2), pc, pb, 0);
	+}
	+
	+/**
	+ * The most generic version, hashes an arbitrary sequence
	+ * of bytes. No alignment or length assumptions are made about
	+ * the input key. For keys not aligned to four byte boundaries
	+ * or a multiple of four bytes in length, the memory region
	+ * just after may be read (but not used in the computation).
	+ * This may cross a page boundary.
	+ *
	+ * @param key
	+ * Key to calculate hash of.
	+ * @param length
	+ * Length of key in bytes.
	+ * @param initval
	+ * Initialising value of hash.
	+ * @return
	+ * Calculated hash value.
	+ */
	+static inline uint32_t
	+rte_jhash(const void *key, uint32_t length, uint32_t initval)
	+{
	+ uint32_t initval2 = 0;
	+
	+ rte_jhash_2hashes(key, length, &initval, &initval2);
	+
	+ return initval;
	+}
	+
	+/**
	+ * A special optimized version that handles 1 or more of uint32_ts.
	+ * The length parameter here is the number of uint32_ts in the key.
	+ *
	+ * @param k
	+ * Key to calculate hash of.
	+ * @param length
	+ * Length of key in units of 4 bytes.
	+ * @param initval
	+ * Initialising value of hash.
	+ * @return
	+ * Calculated hash value.
	+ */
	+static inline uint32_t
	+rte_jhash_32b(const uint32_t *k, uint32_t length, uint32_t initval)
	+{
	+ uint32_t initval2 = 0;
	+
	+ rte_jhash_32b_2hashes(k, length, &initval, &initval2);
	+
	+ return initval;
	+}
	+
	+static inline uint32_t
	+__rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
	+{
	+ a += RTE_JHASH_GOLDEN_RATIO + initval;
	+ b += RTE_JHASH_GOLDEN_RATIO + initval;
	+ c += RTE_JHASH_GOLDEN_RATIO + initval;
	+
	+ __rte_jhash_final(a, b, c);
	+
	+ return c;
	+}
	+
	+/**
	+ * A special ultra-optimized versions that knows it is hashing exactly
	+ * 3 words.
	+ *
	+ * @param a
	+ * First word to calculate hash of.
	+ * @param b
	+ * Second word to calculate hash of.
	+ * @param c
	+ * Third word to calculate hash of.
	+ * @param initval
	+ * Initialising value of hash.
	+ * @return
	+ * Calculated hash value.
	+ */
	+static inline uint32_t
	+rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
	+{
	+ return __rte_jhash_3words(a + 12, b + 12, c + 12, initval);
	+}
	+
	+/**
	+ * A special ultra-optimized versions that knows it is hashing exactly
	+ * 2 words.
	+ *
	+ * @param a
	+ * First word to calculate hash of.
	+ * @param b
	+ * Second word to calculate hash of.
	+ * @param initval
	+ * Initialising value of hash.
	+ * @return
	+ * Calculated hash value.
	+ */
	+static inline uint32_t
	+rte_jhash_2words(uint32_t a, uint32_t b, uint32_t initval)
	+{
	+ return __rte_jhash_3words(a + 8, b + 8, 8, initval);
	+}
	+
	+/**
	+ * A special ultra-optimized versions that knows it is hashing exactly
	+ * 1 word.
	+ *
	+ * @param a
	+ * Word to calculate hash of.
	+ * @param initval
	+ * Initialising value of hash.
	+ * @return
	+ * Calculated hash value.
	+ */
	+static inline uint32_t
	+rte_jhash_1word(uint32_t a, uint32_t initval)
	+{
	+ return __rte_jhash_3words(a + 4, 4, 4, initval);
	+}
	+
	+#ifdef __cplusplus
	+}
	+#endif
	+
	+#endif /* _RTE_JHASH_H */
	Index: sys/contrib/dpdk_rte_lpm/rte_log.h
	===================================================================
	--- /dev/null
	+++ sys/contrib/dpdk_rte_lpm/rte_log.h
	@@ -0,0 +1,383 @@
	+/* SPDX-License-Identifier: BSD-3-Clause
	+ * Copyright(c) 2010-2017 Intel Corporation
	+ */
	+
	+#ifndef _RTE_LOG_H_
	+#define _RTE_LOG_H_
	+
	+/**
	+ * @file
	+ *
	+ * RTE Logs API
	+ *
	+ * This file provides a log API to RTE applications.
	+ */
	+
	+#ifdef __cplusplus
	+extern "C" {
	+#endif
	+
	+#include <stdint.h>
	+#include <stdio.h>
	+#include <stdarg.h>
	+#include <stdbool.h>
	+#include <sys/queue.h>
	+
	+#include <rte_common.h>
	+#include <rte_config.h>
	+#include <rte_compat.h>
	+
	+struct rte_log_dynamic_type;
	+
	+/** The rte_log structure. */
	+struct rte_logs {
	+ uint32_t type; /*< Bitfield with enabled logs. /
	+ uint32_t level; /*< Log level. /
	+ FILE file; /< Output file set by rte_openlog_stream, or NULL. /
	+ size_t dynamic_types_len;
	+ struct rte_log_dynamic_type *dynamic_types;
	+};
	+
	+/** Global log information */
	+extern struct rte_logs rte_logs;
	+
	+/* SDK log type */
	+#define RTE_LOGTYPE_EAL 0 /*< Log related to eal. /
	+#define RTE_LOGTYPE_MALLOC 1 /*< Log related to malloc. /
	+#define RTE_LOGTYPE_RING 2 /*< Log related to ring. /
	+#define RTE_LOGTYPE_MEMPOOL 3 /*< Log related to mempool. /
	+#define RTE_LOGTYPE_TIMER 4 /*< Log related to timers. /
	+#define RTE_LOGTYPE_PMD 5 /*< Log related to poll mode driver. /
	+#define RTE_LOGTYPE_HASH 6 /*< Log related to hash table. /
	+#define RTE_LOGTYPE_LPM 7 /*< Log related to LPM. /
	+#define RTE_LOGTYPE_KNI 8 /*< Log related to KNI. /
	+#define RTE_LOGTYPE_ACL 9 /*< Log related to ACL. /
	+#define RTE_LOGTYPE_POWER 10 /*< Log related to power. /
	+#define RTE_LOGTYPE_METER 11 /*< Log related to QoS meter. /
	+#define RTE_LOGTYPE_SCHED 12 /*< Log related to QoS port scheduler. /
	+#define RTE_LOGTYPE_PORT 13 /*< Log related to port. /
	+#define RTE_LOGTYPE_TABLE 14 /*< Log related to table. /
	+#define RTE_LOGTYPE_PIPELINE 15 /*< Log related to pipeline. /
	+#define RTE_LOGTYPE_MBUF 16 /*< Log related to mbuf. /
	+#define RTE_LOGTYPE_CRYPTODEV 17 /*< Log related to cryptodev. /
	+#define RTE_LOGTYPE_EFD 18 /*< Log related to EFD. /
	+#define RTE_LOGTYPE_EVENTDEV 19 /*< Log related to eventdev. /
	+#define RTE_LOGTYPE_GSO 20 /*< Log related to GSO. /
	+
	+/* these log types can be used in an application */
	+#define RTE_LOGTYPE_USER1 24 /*< User-defined log type 1. /
	+#define RTE_LOGTYPE_USER2 25 /*< User-defined log type 2. /
	+#define RTE_LOGTYPE_USER3 26 /*< User-defined log type 3. /
	+#define RTE_LOGTYPE_USER4 27 /*< User-defined log type 4. /
	+#define RTE_LOGTYPE_USER5 28 /*< User-defined log type 5. /
	+#define RTE_LOGTYPE_USER6 29 /*< User-defined log type 6. /
	+#define RTE_LOGTYPE_USER7 30 /*< User-defined log type 7. /
	+#define RTE_LOGTYPE_USER8 31 /*< User-defined log type 8. /
	+
	+/** First identifier for extended logs */
	+#define RTE_LOGTYPE_FIRST_EXT_ID 32
	+
	+/* Can't use 0, as it gives compiler warnings */
	+#define RTE_LOG_EMERG 1U /*< System is unusable. /
	+#define RTE_LOG_ALERT 2U /*< Action must be taken immediately. /
	+#define RTE_LOG_CRIT 3U /*< Critical conditions. /
	+#define RTE_LOG_ERR 4U /*< Error conditions. /
	+#define RTE_LOG_WARNING 5U /*< Warning conditions. /
	+#define RTE_LOG_NOTICE 6U /*< Normal but significant condition. /
	+#define RTE_LOG_INFO 7U /*< Informational. /
	+#define RTE_LOG_DEBUG 8U /*< Debug-level messages. /
	+
	+/**
	+ * Change the stream that will be used by the logging system.
	+ *
	+ * This can be done at any time. The f argument represents the stream
	+ * to be used to send the logs. If f is NULL, the default output is
	+ * used (stderr).
	+ *
	+ * @param f
	+ * Pointer to the stream.
	+ * @return
	+ * - 0 on success.
	+ * - Negative on error.
	+ */
	+int rte_openlog_stream(FILE *f);
	+
	+/**
	+ * @warning
	+ * @b EXPERIMENTAL: this API may change without prior notice
	+ *
	+ * Retrieve the stream used by the logging system (see rte_openlog_stream()
	+ * to change it).
	+ *
	+ * @return
	+ * Pointer to the stream.
	+ */
	+__rte_experimental
	+FILE *rte_log_get_stream(void);
	+
	+/**
	+ * Set the global log level.
	+ *
	+ * After this call, logs with a level lower or equal than the level
	+ * passed as argument will be displayed.
	+ *
	+ * @param level
	+ * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8).
	+ */
	+void rte_log_set_global_level(uint32_t level);
	+
	+/**
	+ * Get the global log level.
	+ *
	+ * @return
	+ * The current global log level.
	+ */
	+uint32_t rte_log_get_global_level(void);
	+
	+/**
	+ * Get the log level for a given type.
	+ *
	+ * @param logtype
	+ * The log type identifier.
	+ * @return
	+ * 0 on success, a negative value if logtype is invalid.
	+ */
	+int rte_log_get_level(uint32_t logtype);
	+
	+/**
	+ * For a given `logtype`, check if a log with `loglevel` can be printed.
	+ *
	+ * @param logtype
	+ * The log type identifier
	+ * @param loglevel
	+ * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8).
	+ * @return
	+ * Returns 'true' if log can be printed and 'false' if it can't.
	+ */
	+__rte_experimental
	+bool rte_log_can_log(uint32_t logtype, uint32_t loglevel);
	+
	+/**
	+ * Set the log level for a given type based on shell pattern.
	+ *
	+ * @param pattern
	+ * The match pattern identifying the log type.
	+ * @param level
	+ * The level to be set.
	+ * @return
	+ * 0 on success, a negative value if level is invalid.
	+ */
	+int rte_log_set_level_pattern(const char *pattern, uint32_t level);
	+
	+/**
	+ * Set the log level for a given type based on regular expression.
	+ *
	+ * @param regex
	+ * The regular expression identifying the log type.
	+ * @param level
	+ * The level to be set.
	+ * @return
	+ * 0 on success, a negative value if level is invalid.
	+ */
	+int rte_log_set_level_regexp(const char *regex, uint32_t level);
	+
	+/**
	+ * Set the log level for a given type.
	+ *
	+ * @param logtype
	+ * The log type identifier.
	+ * @param level
	+ * The level to be set.
	+ * @return
	+ * 0 on success, a negative value if logtype or level is invalid.
	+ */
	+int rte_log_set_level(uint32_t logtype, uint32_t level);
	+
	+/**
	+ * Get the current loglevel for the message being processed.
	+ *
	+ * Before calling the user-defined stream for logging, the log
	+ * subsystem sets a per-lcore variable containing the loglevel and the
	+ * logtype of the message being processed. This information can be
	+ * accessed by the user-defined log output function through this
	+ * function.
	+ *
	+ * @return
	+ * The loglevel of the message being processed.
	+ */
	+int rte_log_cur_msg_loglevel(void);
	+
	+/**
	+ * Get the current logtype for the message being processed.
	+ *
	+ * Before calling the user-defined stream for logging, the log
	+ * subsystem sets a per-lcore variable containing the loglevel and the
	+ * logtype of the message being processed. This information can be
	+ * accessed by the user-defined log output function through this
	+ * function.
	+ *
	+ * @return
	+ * The logtype of the message being processed.
	+ */
	+int rte_log_cur_msg_logtype(void);
	+
	+/**
	+ * Register a dynamic log type
	+ *
	+ * If a log is already registered with the same type, the returned value
	+ * is the same than the previous one.
	+ *
	+ * @param name
	+ * The string identifying the log type.
	+ * @return
	+ * - >0: success, the returned value is the log type identifier.
	+ * - (-ENOMEM): cannot allocate memory.
	+ */
	+int rte_log_register(const char *name);
	+
	+/**
	+ * @warning
	+ * @b EXPERIMENTAL: this API may change without prior notice
	+ *
	+ * Register a dynamic log type and try to pick its level from EAL options
	+ *
	+ * rte_log_register() is called inside. If successful, the function tries
	+ * to search for matching regexp in the list of EAL log level options and
	+ * pick the level from the last matching entry. If nothing can be applied
	+ * from the list, the level will be set to the user-defined default value.
	+ *
	+ * @param name
	+ * Name for the log type to be registered
	+ * @param level_def
	+ * Fallback level to be set if the global list has no matching options
	+ * @return
	+ * - >=0: the newly registered log type
	+ * - <0: rte_log_register() error value
	+ */
	+__rte_experimental
	+int rte_log_register_type_and_pick_level(const char *name, uint32_t level_def);
	+
	+/**
	+ * Dump log information.
	+ *
	+ * Dump the global level and the registered log types.
	+ *
	+ * @param f
	+ * The output stream where the dump should be sent.
	+ */
	+void rte_log_dump(FILE *f);
	+
	+/**
	+ * Generates a log message.
	+ *
	+ * The message will be sent in the stream defined by the previous call
	+ * to rte_openlog_stream().
	+ *
	+ * The level argument determines if the log should be displayed or
	+ * not, depending on the global rte_logs variable.
	+ *
	+ * The preferred alternative is the RTE_LOG() because it adds the
	+ * level and type in the logged string.
	+ *
	+ * @param level
	+ * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8).
	+ * @param logtype
	+ * The log type, for example, RTE_LOGTYPE_EAL.
	+ * @param format
	+ * The format string, as in printf(3), followed by the variable arguments
	+ * required by the format.
	+ * @return
	+ * - 0: Success.
	+ * - Negative on error.
	+ */
	+int rte_log(uint32_t level, uint32_t logtype, const char *format, ...)
	+#ifdef __GNUC__
	+#if (__GNUC__ > 4 \|\| (__GNUC__ == 4 && __GNUC_MINOR__ > 2))
	+ __rte_cold
	+#endif
	+#endif
	+ __rte_format_printf(3, 4);
	+
	+/**
	+ * Generates a log message.
	+ *
	+ * The message will be sent in the stream defined by the previous call
	+ * to rte_openlog_stream().
	+ *
	+ * The level argument determines if the log should be displayed or
	+ * not, depending on the global rte_logs variable. A trailing
	+ * newline may be added if needed.
	+ *
	+ * The preferred alternative is the RTE_LOG() because it adds the
	+ * level and type in the logged string.
	+ *
	+ * @param level
	+ * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8).
	+ * @param logtype
	+ * The log type, for example, RTE_LOGTYPE_EAL.
	+ * @param format
	+ * The format string, as in printf(3), followed by the variable arguments
	+ * required by the format.
	+ * @param ap
	+ * The va_list of the variable arguments required by the format.
	+ * @return
	+ * - 0: Success.
	+ * - Negative on error.
	+ */
	+int rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap)
	+ __rte_format_printf(3, 0);
	+
	+/**
	+ * Generates a log message.
	+ *
	+ * The RTE_LOG() is a helper that prefixes the string with the log level
	+ * and type, and call rte_log().
	+ *
	+ * @param l
	+ * Log level. A value between EMERG (1) and DEBUG (8). The short name is
	+ * expanded by the macro, so it cannot be an integer value.
	+ * @param t
	+ * The log type, for example, EAL. The short name is expanded by the
	+ * macro, so it cannot be an integer value.
	+ * @param ...
	+ * The fmt string, as in printf(3), followed by the variable arguments
	+ * required by the format.
	+ * @return
	+ * - 0: Success.
	+ * - Negative on error.
	+ */
	+#define RTE_LOG(l, t, ...) \
	+ rte_log(RTE_LOG_ ## l, \
	+ RTE_LOGTYPE_ ## t, # t ": " __VA_ARGS__)
	+
	+/**
	+ * Generates a log message for data path.
	+ *
	+ * Similar to RTE_LOG(), except that it is removed at compilation time
	+ * if the RTE_LOG_DP_LEVEL configuration option is lower than the log
	+ * level argument.
	+ *
	+ * @param l
	+ * Log level. A value between EMERG (1) and DEBUG (8). The short name is
	+ * expanded by the macro, so it cannot be an integer value.
	+ * @param t
	+ * The log type, for example, EAL. The short name is expanded by the
	+ * macro, so it cannot be an integer value.
	+ * @param ...
	+ * The fmt string, as in printf(3), followed by the variable arguments
	+ * required by the format.
	+ * @return
	+ * - 0: Success.
	+ * - Negative on error.
	+ */
	+#define RTE_LOG_DP(l, t, ...) \
	+ (void)((RTE_LOG_ ## l <= RTE_LOG_DP_LEVEL) ? \
	+ rte_log(RTE_LOG_ ## l, \
	+ RTE_LOGTYPE_ ## t, # t ": " __VA_ARGS__) : \
	+ 0)
	+
	+#ifdef __cplusplus
	+}
	+#endif
	+
	+#endif /* _RTE_LOG_H_ */
	Index: sys/contrib/dpdk_rte_lpm/rte_lpm.h
	===================================================================
	--- /dev/null
	+++ sys/contrib/dpdk_rte_lpm/rte_lpm.h
	@@ -0,0 +1,403 @@
	+/* SPDX-License-Identifier: BSD-3-Clause
	+ * Copyright(c) 2010-2014 Intel Corporation
	+ */
	+
	+#ifndef _RTE_LPM_H_
	+#define _RTE_LPM_H_
	+
	+/**
	+ * @file
	+ * RTE Longest Prefix Match (LPM)
	+ */
	+
	+/*
	+#include <errno.h>
	+#include <sys/queue.h>
	+#include <stdint.h>
	+#include <stdlib.h>
	+#include <rte_branch_prediction.h>
	+#include <rte_byteorder.h>
	+#include <rte_config.h>
	+#include <rte_memory.h>
	+#include <rte_common.h>
	+#include <rte_vect.h>
	+*/
	+#include "rte_branch_prediction.h"
	+
	+#ifdef __cplusplus
	+extern "C" {
	+#endif
	+
	+/** Max number of characters in LPM name. */
	+#define RTE_LPM_NAMESIZE 32
	+
	+/** Maximum depth value possible for IPv4 LPM. */
	+#define RTE_LPM_MAX_DEPTH 32
	+
	+/** @internal Total number of tbl24 entries. */
	+#define RTE_LPM_TBL24_NUM_ENTRIES (1 << 24)
	+
	+/** @internal Number of entries in a tbl8 group. */
	+#define RTE_LPM_TBL8_GROUP_NUM_ENTRIES 256
	+
	+/** @internal Max number of tbl8 groups in the tbl8. */
	+#define RTE_LPM_MAX_TBL8_NUM_GROUPS (1 << 24)
	+
	+/** @internal Total number of tbl8 groups in the tbl8. */
	+#define RTE_LPM_TBL8_NUM_GROUPS 256
	+
	+/** @internal Total number of tbl8 entries. */
	+#define RTE_LPM_TBL8_NUM_ENTRIES (RTE_LPM_TBL8_NUM_GROUPS * \
	+ RTE_LPM_TBL8_GROUP_NUM_ENTRIES)
	+
	+/** @internal Macro to enable/disable run-time checks. */
	+#if defined(RTE_LIBRTE_LPM_DEBUG)
	+#define RTE_LPM_RETURN_IF_TRUE(cond, retval) do { \
	+ if (cond) return (retval); \
	+} while (0)
	+#else
	+#define RTE_LPM_RETURN_IF_TRUE(cond, retval)
	+#endif
	+
	+/** @internal bitmask with valid and valid_group fields set */
	+#define RTE_LPM_VALID_EXT_ENTRY_BITMASK 0x03000000
	+
	+/** Bitmask used to indicate successful lookup */
	+#define RTE_LPM_LOOKUP_SUCCESS 0x01000000
	+
	+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
	+/** @internal Tbl24 entry structure. */
	+__extension__
	+struct rte_lpm_tbl_entry {
	+ /**
	+ * Stores Next hop (tbl8 or tbl24 when valid_group is not set) or
	+ * a group index pointing to a tbl8 structure (tbl24 only, when
	+ * valid_group is set)
	+ */
	+ uint32_t next_hop :24;
	+ /* Using single uint8_t to store 3 values. */
	+ uint32_t valid :1; /*< Validation flag. /
	+ /**
	+ * For tbl24:
	+ * - valid_group == 0: entry stores a next hop
	+ * - valid_group == 1: entry stores a group_index pointing to a tbl8
	+ * For tbl8:
	+ * - valid_group indicates whether the current tbl8 is in use or not
	+ */
	+ uint32_t valid_group :1;
	+ uint32_t depth :6; /*< Rule depth. /
	+};
	+
	+#else
	+
	+__extension__
	+struct rte_lpm_tbl_entry {
	+ uint32_t depth :6;
	+ uint32_t valid_group :1;
	+ uint32_t valid :1;
	+ uint32_t next_hop :24;
	+
	+};
	+
	+#endif
	+
	+/** LPM configuration structure. */
	+struct rte_lpm_config {
	+ uint32_t max_rules; /*< Max number of rules. /
	+ uint32_t number_tbl8s; /*< Number of tbl8s to allocate. /
	+ int flags; /*< This field is currently unused. /
	+};
	+
	+/** @internal Rule structure. */
	+struct rte_lpm_rule {
	+ uint32_t ip; /*< Rule IP address. /
	+ uint32_t next_hop; /*< Rule next hop. /
	+};
	+
	+/** @internal Contains metadata about the rules table. */
	+struct rte_lpm_rule_info {
	+ uint32_t used_rules; /*< Used rules so far. /
	+ uint32_t first_rule; /*< Indexes the first rule of a given depth. /
	+};
	+
	+struct nhop_object;
	+struct rte_lpm_external {
	+ struct nhop_object nh_idx; /< # -> idx mappings */
	+ uint32_t default_idx; /* nhop index of default route */
	+ uint32_t fibnum; /* fib index */
	+};
	+
	+/** @internal LPM structure. */
	+struct rte_lpm {
	+ /* LPM metadata. */
	+ struct rte_lpm_external ext;
	+ char name[RTE_LPM_NAMESIZE]; /*< Name of the lpm. /
	+ uint32_t max_rules; /*< Max. balanced rules per lpm. /
	+ uint32_t number_tbl8s; /*< Number of tbl8s. /
	+ struct rte_lpm_rule_info rule_info[RTE_LPM_MAX_DEPTH]; /*< Rule info table. /
	+
	+ /* LPM Tables. */
	+ struct rte_lpm_tbl_entry tbl24[RTE_LPM_TBL24_NUM_ENTRIES]
	+ __rte_cache_aligned; /*< LPM tbl24 table. /
	+ struct rte_lpm_tbl_entry tbl8; /< LPM tbl8 table. /
	+ struct rte_lpm_rule rules_tbl; /< LPM rules. /
	+};
	+
	+/**
	+ * Create an LPM object.
	+ *
	+ * @param name
	+ * LPM object name
	+ * @param socket_id
	+ * NUMA socket ID for LPM table memory allocation
	+ * @param config
	+ * Structure containing the configuration
	+ * @return
	+ * Handle to LPM object on success, NULL otherwise with rte_errno set
	+ * to an appropriate values. Possible rte_errno values include:
	+ * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
	+ * - E_RTE_SECONDARY - function was called from a secondary process instance
	+ * - EINVAL - invalid parameter passed to function
	+ * - ENOSPC - the maximum number of memzones has already been allocated
	+ * - EEXIST - a memzone with the same name already exists
	+ * - ENOMEM - no appropriate memory area found in which to create memzone
	+ */
	+struct rte_lpm *
	+rte_lpm_create(const char *name, int socket_id,
	+ const struct rte_lpm_config *config);
	+
	+/**
	+ * Find an existing LPM object and return a pointer to it.
	+ *
	+ * @param name
	+ * Name of the lpm object as passed to rte_lpm_create()
	+ * @return
	+ * Pointer to lpm object or NULL if object not found with rte_errno
	+ * set appropriately. Possible rte_errno values include:
	+ * - ENOENT - required entry not available to return.
	+ */
	+struct rte_lpm *
	+rte_lpm_find_existing(const char *name);
	+
	+/**
	+ * Free an LPM object.
	+ *
	+ * @param lpm
	+ * LPM object handle
	+ * @return
	+ * None
	+ */
	+void
	+rte_lpm_free(struct rte_lpm *lpm);
	+
	+/**
	+ * Add a rule to the LPM table.
	+ *
	+ * @param lpm
	+ * LPM object handle
	+ * @param ip
	+ * IP of the rule to be added to the LPM table
	+ * @param depth
	+ * Depth of the rule to be added to the LPM table
	+ * @param next_hop
	+ * Next hop of the rule to be added to the LPM table
	+ * @return
	+ * 0 on success, negative value otherwise
	+ */
	+int
	+rte_lpm_add(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, uint32_t next_hop);
	+
	+/**
	+ * Check if a rule is present in the LPM table,
	+ * and provide its next hop if it is.
	+ *
	+ * @param lpm
	+ * LPM object handle
	+ * @param ip
	+ * IP of the rule to be searched
	+ * @param depth
	+ * Depth of the rule to searched
	+ * @param next_hop
	+ * Next hop of the rule (valid only if it is found)
	+ * @return
	+ * 1 if the rule exists, 0 if it does not, a negative value on failure
	+ */
	+int
	+rte_lpm_is_rule_present(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
	+uint32_t *next_hop);
	+
	+/**
	+ * Delete a rule from the LPM table.
	+ *
	+ * @param lpm
	+ * LPM object handle
	+ * @param ip
	+ * IP of the rule to be deleted from the LPM table
	+ * @param depth
	+ * Depth of the rule to be deleted from the LPM table
	+ * @param psub_rule_depth
	+ * Pointer to depth of the parent rule
	+ * @param sub_rule_nhop
	+ * Pinter to the parent rule nexthop index
	+ * @return
	+ * 0 on success, negative value otherwise
	+ */
	+int
	+rte_lpm_delete(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
	+ uint8_t sub_rule_depth, uint32_t sub_rule_nhop);
	+
	+/**
	+ * Delete all rules from the LPM table.
	+ *
	+ * @param lpm
	+ * LPM object handle
	+ */
	+void
	+rte_lpm_delete_all(struct rte_lpm *lpm);
	+
	+/**
	+ * Lookup an IP into the LPM table.
	+ *
	+ * @param lpm
	+ * LPM object handle
	+ * @param ip
	+ * IP to be looked up in the LPM table
	+ * @param next_hop
	+ * Next hop of the most specific rule found for IP (valid on lookup hit only)
	+ * @return
	+ * -EINVAL for incorrect arguments, -ENOENT on lookup miss, 0 on lookup hit
	+ */
	+static inline int
	+rte_lpm_lookup(struct rte_lpm lpm, uint32_t ip, uint32_t next_hop)
	+{
	+ unsigned tbl24_index = (ip >> 8);
	+ uint32_t tbl_entry;
	+ const uint32_t *ptbl;
	+
	+ /* DEBUG: Check user input arguments. */
	+ RTE_LPM_RETURN_IF_TRUE(((lpm == NULL) \|\| (next_hop == NULL)), -EINVAL);
	+
	+ /* Copy tbl24 entry */
	+ ptbl = (const uint32_t *)(&lpm->tbl24[tbl24_index]);
	+ tbl_entry = *ptbl;
	+
	+ /* Memory ordering is not required in lookup. Because dataflow
	+ * dependency exists, compiler or HW won't be able to re-order
	+ * the operations.
	+ */
	+ /* Copy tbl8 entry (only if needed) */
	+ if (unlikely((tbl_entry & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
	+ RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
	+
	+ unsigned tbl8_index = (uint8_t)ip +
	+ (((uint32_t)tbl_entry & 0x00FFFFFF) *
	+ RTE_LPM_TBL8_GROUP_NUM_ENTRIES);
	+
	+ ptbl = (const uint32_t *)&lpm->tbl8[tbl8_index];
	+ tbl_entry = *ptbl;
	+ }
	+
	+ *next_hop = ((uint32_t)tbl_entry & 0x00FFFFFF);
	+ return (tbl_entry & RTE_LPM_LOOKUP_SUCCESS) ? 0 : -ENOENT;
	+}
	+
	+/**
	+ * Lookup multiple IP addresses in an LPM table. This may be implemented as a
	+ * macro, so the address of the function should not be used.
	+ *
	+ * @param lpm
	+ * LPM object handle
	+ * @param ips
	+ * Array of IPs to be looked up in the LPM table
	+ * @param next_hops
	+ * Next hop of the most specific rule found for IP (valid on lookup hit only).
	+ * This is an array of two byte values. The most significant byte in each
	+ * value says whether the lookup was successful (bitmask
	+ * RTE_LPM_LOOKUP_SUCCESS is set). The least significant byte is the
	+ * actual next hop.
	+ * @param n
	+ * Number of elements in ips (and next_hops) array to lookup. This should be a
	+ * compile time constant, and divisible by 8 for best performance.
	+ * @return
	+ * -EINVAL for incorrect arguments, otherwise 0
	+ */
	+#define rte_lpm_lookup_bulk(lpm, ips, next_hops, n) \
	+ rte_lpm_lookup_bulk_func(lpm, ips, next_hops, n)
	+
	+static inline int
	+rte_lpm_lookup_bulk_func(const struct rte_lpm lpm, const uint32_t ips,
	+ uint32_t *next_hops, const unsigned n)
	+{
	+ unsigned i;
	+ unsigned tbl24_indexes[n];
	+ const uint32_t *ptbl;
	+
	+ /* DEBUG: Check user input arguments. */
	+ RTE_LPM_RETURN_IF_TRUE(((lpm == NULL) \|\| (ips == NULL) \|\|
	+ (next_hops == NULL)), -EINVAL);
	+
	+ for (i = 0; i < n; i++) {
	+ tbl24_indexes[i] = ips[i] >> 8;
	+ }
	+
	+ for (i = 0; i < n; i++) {
	+ /* Simply copy tbl24 entry to output */
	+ ptbl = (const uint32_t *)&lpm->tbl24[tbl24_indexes[i]];
	+ next_hops[i] = *ptbl;
	+
	+ /* Overwrite output with tbl8 entry if needed */
	+ if (unlikely((next_hops[i] & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
	+ RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
	+
	+ unsigned tbl8_index = (uint8_t)ips[i] +
	+ (((uint32_t)next_hops[i] & 0x00FFFFFF) *
	+ RTE_LPM_TBL8_GROUP_NUM_ENTRIES);
	+
	+ ptbl = (const uint32_t *)&lpm->tbl8[tbl8_index];
	+ next_hops[i] = *ptbl;
	+ }
	+ }
	+ return 0;
	+}
	+
	+/* Mask four results. */
	+#define RTE_LPM_MASKX4_RES UINT64_C(0x00ffffff00ffffff)
	+
	+/**
	+ * Lookup four IP addresses in an LPM table.
	+ *
	+ * @param lpm
	+ * LPM object handle
	+ * @param ip
	+ * Four IPs to be looked up in the LPM table
	+ * @param hop
	+ * Next hop of the most specific rule found for IP (valid on lookup hit only).
	+ * This is an 4 elements array of two byte values.
	+ * If the lookup was successful for the given IP, then least significant byte
	+ * of the corresponding element is the actual next hop and the most
	+ * significant byte is zero.
	+ * If the lookup for the given IP failed, then corresponding element would
	+ * contain default value, see description of then next parameter.
	+ * @param defv
	+ * Default value to populate into corresponding element of hop[] array,
	+ * if lookup would fail.
	+ */
	+#if 0
	+static inline void
	+rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4],
	+ uint32_t defv);
	+
	+#if defined(RTE_ARCH_ARM) \|\| defined(RTE_ARCH_ARM64)
	+#include "rte_lpm_neon.h"
	+#elif defined(RTE_ARCH_PPC_64)
	+#include "rte_lpm_altivec.h"
	+#else
	+#include "rte_lpm_sse.h"
	+#endif
	+#endif
	+
	+#ifdef __cplusplus
	+}
	+#endif
	+
	+#endif /* _RTE_LPM_H_ */
	Index: sys/contrib/dpdk_rte_lpm/rte_lpm.c
	===================================================================
	--- /dev/null
	+++ sys/contrib/dpdk_rte_lpm/rte_lpm.c
	@@ -0,0 +1,1107 @@
	+/* SPDX-License-Identifier: BSD-3-Clause
	+ * Copyright(c) 2010-2014 Intel Corporation
	+ */
	+
	+#include <sys/param.h>
	+#include <sys/ctype.h>
	+#include <sys/systm.h>
	+#include <sys/lock.h>
	+#include <sys/rwlock.h>
	+#include <sys/malloc.h>
	+#include <sys/mbuf.h>
	+#include <sys/socket.h>
	+#include <sys/kernel.h>
	+
	+int errno = 0, rte_errno = 0;
	+
	+#if 0
	+#include <string.h>
	+#include <stdint.h>
	+#include <errno.h>
	+#include <stdarg.h>
	+#include <stdio.h>
	+#include <sys/queue.h>
	+
	+#include <rte_log.h>
	+#include <rte_branch_prediction.h>
	+#include <rte_common.h>
	+#include <rte_memory.h> /* for definition of RTE_CACHE_LINE_SIZE */
	+#include <rte_malloc.h>
	+#include <rte_eal.h>
	+#include <rte_eal_memconfig.h>
	+#include <rte_per_lcore.h>
	+#include <rte_string_fns.h>
	+#include <rte_errno.h>
	+#include <rte_rwlock.h>
	+#include <rte_spinlock.h>
	+#include <rte_tailq.h>
	+#endif
	+
	+#include "rte_shim.h"
	+#include "rte_lpm.h"
	+
	+#if 0
	+TAILQ_HEAD(rte_lpm_list, rte_tailq_entry);
	+
	+static struct rte_tailq_elem rte_lpm_tailq = {
	+ .name = "RTE_LPM",
	+};
	+EAL_REGISTER_TAILQ(rte_lpm_tailq)
	+#endif
	+
	+#define MAX_DEPTH_TBL24 24
	+
	+enum valid_flag {
	+ INVALID = 0,
	+ VALID
	+};
	+
	+/* Macro to enable/disable run-time checks. */
	+#if defined(RTE_LIBRTE_LPM_DEBUG)
	+#include <rte_debug.h>
	+#define VERIFY_DEPTH(depth) do { \
	+ if ((depth == 0) \|\| (depth > RTE_LPM_MAX_DEPTH)) \
	+ rte_panic("LPM: Invalid depth (%u) at line %d", \
	+ (unsigned)(depth), __LINE__); \
	+} while (0)
	+#else
	+#define VERIFY_DEPTH(depth)
	+#endif
	+
	+/*
	+ * Converts a given depth value to its corresponding mask value.
	+ *
	+ * depth (IN) : range = 1 - 32
	+ * mask (OUT) : 32bit mask
	+ */
	+static uint32_t __attribute__((pure))
	+depth_to_mask(uint8_t depth)
	+{
	+ VERIFY_DEPTH(depth);
	+
	+ /* To calculate a mask start with a 1 on the left hand side and right
	+ * shift while populating the left hand side with 1's
	+ */
	+ return (int)0x80000000 >> (depth - 1);
	+}
	+
	+/*
	+ * Converts given depth value to its corresponding range value.
	+ */
	+static uint32_t __attribute__((pure))
	+depth_to_range(uint8_t depth)
	+{
	+ VERIFY_DEPTH(depth);
	+
	+ /*
	+ * Calculate tbl24 range. (Note: 2^depth = 1 << depth)
	+ */
	+ if (depth <= MAX_DEPTH_TBL24)
	+ return 1 << (MAX_DEPTH_TBL24 - depth);
	+
	+ /* Else if depth is greater than 24 */
	+ return 1 << (RTE_LPM_MAX_DEPTH - depth);
	+}
	+
	+#if 0
	+/*
	+ * Find an existing lpm table and return a pointer to it.
	+ */
	+struct rte_lpm *
	+rte_lpm_find_existing(const char *name)
	+{
	+ struct rte_lpm *l = NULL;
	+ struct rte_tailq_entry *te;
	+ struct rte_lpm_list *lpm_list;
	+
	+ lpm_list = RTE_TAILQ_CAST(rte_lpm_tailq.head, rte_lpm_list);
	+
	+ rte_mcfg_tailq_read_lock();
	+ TAILQ_FOREACH(te, lpm_list, next) {
	+ l = te->data;
	+ if (strncmp(name, l->name, RTE_LPM_NAMESIZE) == 0)
	+ break;
	+ }
	+ rte_mcfg_tailq_read_unlock();
	+
	+ if (te == NULL) {
	+ rte_errno = ENOENT;
	+ return NULL;
	+ }
	+
	+ return l;
	+}
	+#endif
	+
	+/*
	+ * Allocates memory for LPM object
	+ */
	+struct rte_lpm *
	+rte_lpm_create(const char *name, int socket_id,
	+ const struct rte_lpm_config *config)
	+{
	+ char mem_name[RTE_LPM_NAMESIZE];
	+ struct rte_lpm *lpm = NULL;
	+ //struct rte_tailq_entry *te;
	+ uint32_t mem_size, rules_size, tbl8s_size;
	+ //struct rte_lpm_list *lpm_list;
	+
	+ //lpm_list = RTE_TAILQ_CAST(rte_lpm_tailq.head, rte_lpm_list);
	+
	+ RTE_BUILD_BUG_ON(sizeof(struct rte_lpm_tbl_entry) != 4);
	+
	+ /* Check user arguments. */
	+ if ((name == NULL) \|\| (socket_id < -1) \|\| (config->max_rules == 0)
	+ \|\| config->number_tbl8s > RTE_LPM_MAX_TBL8_NUM_GROUPS) {
	+ rte_errno = EINVAL;
	+ return NULL;
	+ }
	+
	+ snprintf(mem_name, sizeof(mem_name), "LPM_%s", name);
	+
	+ /* Determine the amount of memory to allocate. */
	+ mem_size = sizeof(*lpm);
	+ rules_size = sizeof(struct rte_lpm_rule) * config->max_rules;
	+ tbl8s_size = (sizeof(struct rte_lpm_tbl_entry) *
	+ RTE_LPM_TBL8_GROUP_NUM_ENTRIES * config->number_tbl8s);
	+
	+#if 0
	+ rte_mcfg_tailq_write_lock();
	+
	+ /* guarantee there's no existing */
	+ TAILQ_FOREACH(te, lpm_list, next) {
	+ lpm = te->data;
	+ if (strncmp(name, lpm->name, RTE_LPM_NAMESIZE) == 0)
	+ break;
	+ }
	+
	+ if (te != NULL) {
	+ lpm = NULL;
	+ rte_errno = EEXIST;
	+ goto exit;
	+ }
	+
	+ /* allocate tailq entry */
	+ te = rte_zmalloc("LPM_TAILQ_ENTRY", sizeof(*te), 0);
	+ if (te == NULL) {
	+ RTE_LOG(ERR, LPM, "Failed to allocate tailq entry\n");
	+ rte_errno = ENOMEM;
	+ goto exit;
	+ }
	+#endif
	+
	+ /* Allocate memory to store the LPM data structures. */
	+ lpm = rte_zmalloc_socket(mem_name, mem_size,
	+ RTE_CACHE_LINE_SIZE, socket_id);
	+ if (lpm == NULL) {
	+ RTE_LOG(ERR, LPM, "LPM memory allocation failed\n");
	+ //rte_free(te);
	+ rte_errno = ENOMEM;
	+ goto exit;
	+ }
	+
	+ lpm->rules_tbl = rte_zmalloc_socket(NULL,
	+ (size_t)rules_size, RTE_CACHE_LINE_SIZE, socket_id);
	+
	+ if (lpm->rules_tbl == NULL) {
	+ RTE_LOG(ERR, LPM, "LPM rules_tbl memory allocation failed\n");
	+ rte_free(lpm);
	+ lpm = NULL;
	+ //rte_free(te);
	+ rte_errno = ENOMEM;
	+ goto exit;
	+ }
	+
	+ lpm->tbl8 = rte_zmalloc_socket(NULL,
	+ (size_t)tbl8s_size, RTE_CACHE_LINE_SIZE, socket_id);
	+
	+ if (lpm->tbl8 == NULL) {
	+ RTE_LOG(ERR, LPM, "LPM tbl8 memory allocation failed\n");
	+ rte_free(lpm->rules_tbl);
	+ rte_free(lpm);
	+ lpm = NULL;
	+ //rte_free(te);
	+ rte_errno = ENOMEM;
	+ goto exit;
	+ }
	+
	+ /* Save user arguments. */
	+ lpm->max_rules = config->max_rules;
	+ lpm->number_tbl8s = config->number_tbl8s;
	+ strlcpy(lpm->name, name, sizeof(lpm->name));
	+
	+ //te->data = lpm;
	+
	+ //TAILQ_INSERT_TAIL(lpm_list, te, next);
	+
	+exit:
	+ rte_mcfg_tailq_write_unlock();
	+
	+ return lpm;
	+}
	+
	+/*
	+ * Deallocates memory for given LPM table.
	+ */
	+void
	+rte_lpm_free(struct rte_lpm *lpm)
	+{
	+#if 0
	+ struct rte_lpm_list *lpm_list;
	+ struct rte_tailq_entry *te;
	+
	+ /* Check user arguments. */
	+ if (lpm == NULL)
	+ return;
	+
	+ lpm_list = RTE_TAILQ_CAST(rte_lpm_tailq.head, rte_lpm_list);
	+
	+ rte_mcfg_tailq_write_lock();
	+
	+ /* find our tailq entry */
	+ TAILQ_FOREACH(te, lpm_list, next) {
	+ if (te->data == (void *) lpm)
	+ break;
	+ }
	+ if (te != NULL)
	+ TAILQ_REMOVE(lpm_list, te, next);
	+
	+ rte_mcfg_tailq_write_unlock();
	+#endif
	+
	+ rte_free(lpm->tbl8);
	+ rte_free(lpm->rules_tbl);
	+ rte_free(lpm);
	+ //rte_free(te);
	+}
	+
	+#if 0
	+/*
	+ * Adds a rule to the rule table.
	+ *
	+ * NOTE: The rule table is split into 32 groups. Each group contains rules that
	+ * apply to a specific prefix depth (i.e. group 1 contains rules that apply to
	+ * prefixes with a depth of 1 etc.). In the following code (depth - 1) is used
	+ * to refer to depth 1 because even though the depth range is 1 - 32, depths
	+ * are stored in the rule table from 0 - 31.
	+ * NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
	+ */
	+static int32_t
	+rule_add(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth,
	+ uint32_t next_hop)
	+{
	+ uint32_t rule_gindex, rule_index, last_rule;
	+ int i;
	+
	+ VERIFY_DEPTH(depth);
	+
	+ /* Scan through rule group to see if rule already exists. */
	+ if (lpm->rule_info[depth - 1].used_rules > 0) {
	+
	+ /* rule_gindex stands for rule group index. */
	+ rule_gindex = lpm->rule_info[depth - 1].first_rule;
	+ /* Initialise rule_index to point to start of rule group. */
	+ rule_index = rule_gindex;
	+ /* Last rule = Last used rule in this rule group. */
	+ last_rule = rule_gindex + lpm->rule_info[depth - 1].used_rules;
	+
	+ for (; rule_index < last_rule; rule_index++) {
	+
	+ /* If rule already exists update next hop and return. */
	+ if (lpm->rules_tbl[rule_index].ip == ip_masked) {
	+
	+ if (lpm->rules_tbl[rule_index].next_hop
	+ == next_hop)
	+ return -EEXIST;
	+ lpm->rules_tbl[rule_index].next_hop = next_hop;
	+
	+ return rule_index;
	+ }
	+ }
	+
	+ if (rule_index == lpm->max_rules)
	+ return -ENOSPC;
	+ } else {
	+ /* Calculate the position in which the rule will be stored. */
	+ rule_index = 0;
	+
	+ for (i = depth - 1; i > 0; i--) {
	+ if (lpm->rule_info[i - 1].used_rules > 0) {
	+ rule_index = lpm->rule_info[i - 1].first_rule
	+ + lpm->rule_info[i - 1].used_rules;
	+ break;
	+ }
	+ }
	+ if (rule_index == lpm->max_rules)
	+ return -ENOSPC;
	+
	+ lpm->rule_info[depth - 1].first_rule = rule_index;
	+ }
	+
	+ /* Make room for the new rule in the array. */
	+ for (i = RTE_LPM_MAX_DEPTH; i > depth; i--) {
	+ if (lpm->rule_info[i - 1].first_rule
	+ + lpm->rule_info[i - 1].used_rules == lpm->max_rules)
	+ return -ENOSPC;
	+
	+ if (lpm->rule_info[i - 1].used_rules > 0) {
	+ lpm->rules_tbl[lpm->rule_info[i - 1].first_rule
	+ + lpm->rule_info[i - 1].used_rules]
	+ = lpm->rules_tbl[lpm->rule_info[i - 1].first_rule];
	+ lpm->rule_info[i - 1].first_rule++;
	+ }
	+ }
	+
	+ /* Add the new rule. */
	+ lpm->rules_tbl[rule_index].ip = ip_masked;
	+ lpm->rules_tbl[rule_index].next_hop = next_hop;
	+
	+ /* Increment the used rules counter for this rule group. */
	+ lpm->rule_info[depth - 1].used_rules++;
	+
	+ return rule_index;
	+}
	+
	+/*
	+ * Delete a rule from the rule table.
	+ * NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
	+ */
	+static void
	+rule_delete(struct rte_lpm *lpm, int32_t rule_index, uint8_t depth)
	+{
	+ int i;
	+
	+ VERIFY_DEPTH(depth);
	+
	+ lpm->rules_tbl[rule_index] =
	+ lpm->rules_tbl[lpm->rule_info[depth - 1].first_rule
	+ + lpm->rule_info[depth - 1].used_rules - 1];
	+
	+ for (i = depth; i < RTE_LPM_MAX_DEPTH; i++) {
	+ if (lpm->rule_info[i].used_rules > 0) {
	+ lpm->rules_tbl[lpm->rule_info[i].first_rule - 1] =
	+ lpm->rules_tbl[lpm->rule_info[i].first_rule
	+ + lpm->rule_info[i].used_rules - 1];
	+ lpm->rule_info[i].first_rule--;
	+ }
	+ }
	+
	+ lpm->rule_info[depth - 1].used_rules--;
	+}
	+
	+/*
	+ * Finds a rule in rule table.
	+ * NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
	+ */
	+static int32_t
	+rule_find(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth)
	+{
	+ uint32_t rule_gindex, last_rule, rule_index;
	+
	+ VERIFY_DEPTH(depth);
	+
	+ rule_gindex = lpm->rule_info[depth - 1].first_rule;
	+ last_rule = rule_gindex + lpm->rule_info[depth - 1].used_rules;
	+
	+ /* Scan used rules at given depth to find rule. */
	+ for (rule_index = rule_gindex; rule_index < last_rule; rule_index++) {
	+ /* If rule is found return the rule index. */
	+ if (lpm->rules_tbl[rule_index].ip == ip_masked)
	+ return rule_index;
	+ }
	+
	+ /* If rule is not found return -EINVAL. */
	+ return -EINVAL;
	+}
	+#endif
	+
	+/*
	+ * Find, clean and allocate a tbl8.
	+ */
	+static int32_t
	+tbl8_alloc(struct rte_lpm_tbl_entry *tbl8, uint32_t number_tbl8s)
	+{
	+ uint32_t group_idx; /* tbl8 group index. */
	+ struct rte_lpm_tbl_entry *tbl8_entry;
	+
	+ /* Scan through tbl8 to find a free (i.e. INVALID) tbl8 group. */
	+ for (group_idx = 0; group_idx < number_tbl8s; group_idx++) {
	+ tbl8_entry = &tbl8[group_idx * RTE_LPM_TBL8_GROUP_NUM_ENTRIES];
	+ /* If a free tbl8 group is found clean it and set as VALID. */
	+ if (!tbl8_entry->valid_group) {
	+ struct rte_lpm_tbl_entry new_tbl8_entry = {
	+ .next_hop = 0,
	+ .valid = INVALID,
	+ .depth = 0,
	+ .valid_group = VALID,
	+ };
	+
	+ memset(&tbl8_entry[0], 0,
	+ RTE_LPM_TBL8_GROUP_NUM_ENTRIES *
	+ sizeof(tbl8_entry[0]));
	+
	+ __atomic_store(tbl8_entry, &new_tbl8_entry,
	+ __ATOMIC_RELAXED);
	+
	+ /* Return group index for allocated tbl8 group. */
	+ return group_idx;
	+ }
	+ }
	+
	+ /* If there are no tbl8 groups free then return error. */
	+ return -ENOSPC;
	+}
	+
	+static void
	+tbl8_free(struct rte_lpm_tbl_entry *tbl8, uint32_t tbl8_group_start)
	+{
	+ /* Set tbl8 group invalid*/
	+ struct rte_lpm_tbl_entry zero_tbl8_entry = {0};
	+
	+ __atomic_store(&tbl8[tbl8_group_start], &zero_tbl8_entry,
	+ __ATOMIC_RELAXED);
	+}
	+
	+static __rte_noinline int32_t
	+add_depth_small(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
	+ uint32_t next_hop)
	+{
	+#define group_idx next_hop
	+ uint32_t tbl24_index, tbl24_range, tbl8_index, tbl8_group_end, i, j;
	+
	+ /* Calculate the index into Table24. */
	+ tbl24_index = ip >> 8;
	+ tbl24_range = depth_to_range(depth);
	+
	+ for (i = tbl24_index; i < (tbl24_index + tbl24_range); i++) {
	+ /*
	+ * For invalid OR valid and non-extended tbl 24 entries set
	+ * entry.
	+ */
	+ if (!lpm->tbl24[i].valid \|\| (lpm->tbl24[i].valid_group == 0 &&
	+ lpm->tbl24[i].depth <= depth)) {
	+
	+ struct rte_lpm_tbl_entry new_tbl24_entry = {
	+ .next_hop = next_hop,
	+ .valid = VALID,
	+ .valid_group = 0,
	+ .depth = depth,
	+ };
	+
	+ /* Setting tbl24 entry in one go to avoid race
	+ * conditions
	+ */
	+ __atomic_store(&lpm->tbl24[i], &new_tbl24_entry,
	+ __ATOMIC_RELEASE);
	+
	+ continue;
	+ }
	+
	+ if (lpm->tbl24[i].valid_group == 1) {
	+ /* If tbl24 entry is valid and extended calculate the
	+ * index into tbl8.
	+ */
	+ tbl8_index = lpm->tbl24[i].group_idx *
	+ RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
	+ tbl8_group_end = tbl8_index +
	+ RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
	+
	+ for (j = tbl8_index; j < tbl8_group_end; j++) {
	+ if (!lpm->tbl8[j].valid \|\|
	+ lpm->tbl8[j].depth <= depth) {
	+ struct rte_lpm_tbl_entry
	+ new_tbl8_entry = {
	+ .valid = VALID,
	+ .valid_group = VALID,
	+ .depth = depth,
	+ .next_hop = next_hop,
	+ };
	+
	+ /*
	+ * Setting tbl8 entry in one go to avoid
	+ * race conditions
	+ */
	+ __atomic_store(&lpm->tbl8[j],
	+ &new_tbl8_entry,
	+ __ATOMIC_RELAXED);
	+
	+ continue;
	+ }
	+ }
	+ }
	+ }
	+#undef group_idx
	+ return 0;
	+}
	+
	+static __rte_noinline int32_t
	+add_depth_big(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth,
	+ uint32_t next_hop)
	+{
	+#define group_idx next_hop
	+ uint32_t tbl24_index;
	+ int32_t tbl8_group_index, tbl8_group_start, tbl8_group_end, tbl8_index,
	+ tbl8_range, i;
	+
	+ tbl24_index = (ip_masked >> 8);
	+ tbl8_range = depth_to_range(depth);
	+
	+ if (!lpm->tbl24[tbl24_index].valid) {
	+ /* Search for a free tbl8 group. */
	+ tbl8_group_index = tbl8_alloc(lpm->tbl8, lpm->number_tbl8s);
	+
	+ /* Check tbl8 allocation was successful. */
	+ if (tbl8_group_index < 0) {
	+ return tbl8_group_index;
	+ }
	+
	+ /* Find index into tbl8 and range. */
	+ tbl8_index = (tbl8_group_index *
	+ RTE_LPM_TBL8_GROUP_NUM_ENTRIES) +
	+ (ip_masked & 0xFF);
	+
	+ /* Set tbl8 entry. */
	+ for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) {
	+ struct rte_lpm_tbl_entry new_tbl8_entry = {
	+ .valid = VALID,
	+ .depth = depth,
	+ .valid_group = lpm->tbl8[i].valid_group,
	+ .next_hop = next_hop,
	+ };
	+ __atomic_store(&lpm->tbl8[i], &new_tbl8_entry,
	+ __ATOMIC_RELAXED);
	+ }
	+
	+ /*
	+ * Update tbl24 entry to point to new tbl8 entry. Note: The
	+ * ext_flag and tbl8_index need to be updated simultaneously,
	+ * so assign whole structure in one go
	+ */
	+
	+ struct rte_lpm_tbl_entry new_tbl24_entry = {
	+ .group_idx = tbl8_group_index,
	+ .valid = VALID,
	+ .valid_group = 1,
	+ .depth = 0,
	+ };
	+
	+ /* The tbl24 entry must be written only after the
	+ * tbl8 entries are written.
	+ */
	+ __atomic_store(&lpm->tbl24[tbl24_index], &new_tbl24_entry,
	+ __ATOMIC_RELEASE);
	+
	+ } /* If valid entry but not extended calculate the index into Table8. */
	+ else if (lpm->tbl24[tbl24_index].valid_group == 0) {
	+ /* Search for free tbl8 group. */
	+ tbl8_group_index = tbl8_alloc(lpm->tbl8, lpm->number_tbl8s);
	+
	+ if (tbl8_group_index < 0) {
	+ return tbl8_group_index;
	+ }
	+
	+ tbl8_group_start = tbl8_group_index *
	+ RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
	+ tbl8_group_end = tbl8_group_start +
	+ RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
	+
	+ /* Populate new tbl8 with tbl24 value. */
	+ for (i = tbl8_group_start; i < tbl8_group_end; i++) {
	+ struct rte_lpm_tbl_entry new_tbl8_entry = {
	+ .valid = VALID,
	+ .depth = lpm->tbl24[tbl24_index].depth,
	+ .valid_group = lpm->tbl8[i].valid_group,
	+ .next_hop = lpm->tbl24[tbl24_index].next_hop,
	+ };
	+ __atomic_store(&lpm->tbl8[i], &new_tbl8_entry,
	+ __ATOMIC_RELAXED);
	+ }
	+
	+ tbl8_index = tbl8_group_start + (ip_masked & 0xFF);
	+
	+ /* Insert new rule into the tbl8 entry. */
	+ for (i = tbl8_index; i < tbl8_index + tbl8_range; i++) {
	+ struct rte_lpm_tbl_entry new_tbl8_entry = {
	+ .valid = VALID,
	+ .depth = depth,
	+ .valid_group = lpm->tbl8[i].valid_group,
	+ .next_hop = next_hop,
	+ };
	+ __atomic_store(&lpm->tbl8[i], &new_tbl8_entry,
	+ __ATOMIC_RELAXED);
	+ }
	+
	+ /*
	+ * Update tbl24 entry to point to new tbl8 entry. Note: The
	+ * ext_flag and tbl8_index need to be updated simultaneously,
	+ * so assign whole structure in one go.
	+ */
	+
	+ struct rte_lpm_tbl_entry new_tbl24_entry = {
	+ .group_idx = tbl8_group_index,
	+ .valid = VALID,
	+ .valid_group = 1,
	+ .depth = 0,
	+ };
	+
	+ /* The tbl24 entry must be written only after the
	+ * tbl8 entries are written.
	+ */
	+ __atomic_store(&lpm->tbl24[tbl24_index], &new_tbl24_entry,
	+ __ATOMIC_RELEASE);
	+
	+ } else { /*
	+ * If it is valid, extended entry calculate the index into tbl8.
	+ */
	+ tbl8_group_index = lpm->tbl24[tbl24_index].group_idx;
	+ tbl8_group_start = tbl8_group_index *
	+ RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
	+ tbl8_index = tbl8_group_start + (ip_masked & 0xFF);
	+
	+ for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) {
	+
	+ if (!lpm->tbl8[i].valid \|\|
	+ lpm->tbl8[i].depth <= depth) {
	+ struct rte_lpm_tbl_entry new_tbl8_entry = {
	+ .valid = VALID,
	+ .depth = depth,
	+ .next_hop = next_hop,
	+ .valid_group = lpm->tbl8[i].valid_group,
	+ };
	+
	+ /*
	+ * Setting tbl8 entry in one go to avoid race
	+ * condition
	+ */
	+ __atomic_store(&lpm->tbl8[i], &new_tbl8_entry,
	+ __ATOMIC_RELAXED);
	+
	+ continue;
	+ }
	+ }
	+ }
	+#undef group_idx
	+ return 0;
	+}
	+
	+/*
	+ * Add a route
	+ */
	+int
	+rte_lpm_add(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
	+ uint32_t next_hop)
	+{
	+ int32_t status = 0;
	+ uint32_t ip_masked;
	+
	+ /* Check user arguments. */
	+ if ((lpm == NULL) \|\| (depth < 1) \|\| (depth > RTE_LPM_MAX_DEPTH))
	+ return -EINVAL;
	+
	+ ip_masked = ip & depth_to_mask(depth);
	+
	+#if 0
	+ /* Add the rule to the rule table. */
	+ rule_index = rule_add(lpm, ip_masked, depth, next_hop);
	+
	+ /* Skip table entries update if The rule is the same as
	+ * the rule in the rules table.
	+ */
	+ if (rule_index == -EEXIST)
	+ return 0;
	+
	+ /* If the is no space available for new rule return error. */
	+ if (rule_index < 0) {
	+ return rule_index;
	+ }
	+#endif
	+
	+ if (depth <= MAX_DEPTH_TBL24) {
	+ status = add_depth_small(lpm, ip_masked, depth, next_hop);
	+ } else { /* If depth > RTE_LPM_MAX_DEPTH_TBL24 */
	+ status = add_depth_big(lpm, ip_masked, depth, next_hop);
	+
	+ /*
	+ * If add fails due to exhaustion of tbl8 extensions delete
	+ * rule that was added to rule table.
	+ */
	+ if (status < 0) {
	+ //rule_delete(lpm, rule_index, depth);
	+
	+ return status;
	+ }
	+ }
	+
	+ return 0;
	+}
	+
	+#if 0
	+/*
	+ * Look for a rule in the high-level rules table
	+ */
	+int
	+rte_lpm_is_rule_present(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
	+uint32_t *next_hop)
	+{
	+ uint32_t ip_masked;
	+ int32_t rule_index;
	+
	+ /* Check user arguments. */
	+ if ((lpm == NULL) \|\|
	+ (next_hop == NULL) \|\|
	+ (depth < 1) \|\| (depth > RTE_LPM_MAX_DEPTH))
	+ return -EINVAL;
	+
	+ /* Look for the rule using rule_find. */
	+ ip_masked = ip & depth_to_mask(depth);
	+ rule_index = rule_find(lpm, ip_masked, depth);
	+
	+ if (rule_index >= 0) {
	+ *next_hop = lpm->rules_tbl[rule_index].next_hop;
	+ return 1;
	+ }
	+
	+ /* If rule is not found return 0. */
	+ return 0;
	+}
	+
	+static int32_t
	+find_previous_rule(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
	+ uint8_t *sub_rule_depth)
	+{
	+ int32_t rule_index;
	+ uint32_t ip_masked;
	+ uint8_t prev_depth;
	+
	+ for (prev_depth = (uint8_t)(depth - 1); prev_depth > 0; prev_depth--) {
	+ ip_masked = ip & depth_to_mask(prev_depth);
	+
	+ rule_index = rule_find(lpm, ip_masked, prev_depth);
	+
	+ if (rule_index >= 0) {
	+ *sub_rule_depth = prev_depth;
	+ return rule_index;
	+ }
	+ }
	+
	+ return -1;
	+}
	+#endif
	+
	+static int32_t
	+delete_depth_small(struct rte_lpm *lpm, uint32_t ip_masked,
	+ uint8_t depth, uint32_t sub_rule_nhop, uint8_t sub_rule_depth)
	+{
	+#define group_idx next_hop
	+ uint32_t tbl24_range, tbl24_index, tbl8_group_index, tbl8_index, i, j;
	+
	+ /* Calculate the range and index into Table24. */
	+ tbl24_range = depth_to_range(depth);
	+ tbl24_index = (ip_masked >> 8);
	+ struct rte_lpm_tbl_entry zero_tbl24_entry = {0};
	+
	+ /*
	+ * Firstly check the sub_rule_index. A -1 indicates no replacement rule
	+ * and a positive number indicates a sub_rule_index.
	+ */
	+ if (sub_rule_nhop == 0) {
	+ /*
	+ * If no replacement rule exists then invalidate entries
	+ * associated with this rule.
	+ */
	+ for (i = tbl24_index; i < (tbl24_index + tbl24_range); i++) {
	+
	+ if (lpm->tbl24[i].valid_group == 0 &&
	+ lpm->tbl24[i].depth <= depth) {
	+ __atomic_store(&lpm->tbl24[i],
	+ &zero_tbl24_entry, __ATOMIC_RELEASE);
	+ } else if (lpm->tbl24[i].valid_group == 1) {
	+ /*
	+ * If TBL24 entry is extended, then there has
	+ * to be a rule with depth >= 25 in the
	+ * associated TBL8 group.
	+ */
	+
	+ tbl8_group_index = lpm->tbl24[i].group_idx;
	+ tbl8_index = tbl8_group_index *
	+ RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
	+
	+ for (j = tbl8_index; j < (tbl8_index +
	+ RTE_LPM_TBL8_GROUP_NUM_ENTRIES); j++) {
	+
	+ if (lpm->tbl8[j].depth <= depth)
	+ lpm->tbl8[j].valid = INVALID;
	+ }
	+ }
	+ }
	+ } else {
	+ /*
	+ * If a replacement rule exists then modify entries
	+ * associated with this rule.
	+ */
	+
	+ struct rte_lpm_tbl_entry new_tbl24_entry = {
	+ .next_hop = sub_rule_nhop,
	+ .valid = VALID,
	+ .valid_group = 0,
	+ .depth = sub_rule_depth,
	+ };
	+
	+ struct rte_lpm_tbl_entry new_tbl8_entry = {
	+ .valid = VALID,
	+ .valid_group = VALID,
	+ .depth = sub_rule_depth,
	+ .next_hop = sub_rule_nhop,
	+ };
	+
	+ for (i = tbl24_index; i < (tbl24_index + tbl24_range); i++) {
	+
	+ if (lpm->tbl24[i].valid_group == 0 &&
	+ lpm->tbl24[i].depth <= depth) {
	+ __atomic_store(&lpm->tbl24[i], &new_tbl24_entry,
	+ __ATOMIC_RELEASE);
	+ } else if (lpm->tbl24[i].valid_group == 1) {
	+ /*
	+ * If TBL24 entry is extended, then there has
	+ * to be a rule with depth >= 25 in the
	+ * associated TBL8 group.
	+ */
	+
	+ tbl8_group_index = lpm->tbl24[i].group_idx;
	+ tbl8_index = tbl8_group_index *
	+ RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
	+
	+ for (j = tbl8_index; j < (tbl8_index +
	+ RTE_LPM_TBL8_GROUP_NUM_ENTRIES); j++) {
	+
	+ if (lpm->tbl8[j].depth <= depth)
	+ __atomic_store(&lpm->tbl8[j],
	+ &new_tbl8_entry,
	+ __ATOMIC_RELAXED);
	+ }
	+ }
	+ }
	+ }
	+#undef group_idx
	+ return 0;
	+}
	+
	+/*
	+ * Checks if table 8 group can be recycled.
	+ *
	+ * Return of -EEXIST means tbl8 is in use and thus can not be recycled.
	+ * Return of -EINVAL means tbl8 is empty and thus can be recycled
	+ * Return of value > -1 means tbl8 is in use but has all the same values and
	+ * thus can be recycled
	+ */
	+static int32_t
	+tbl8_recycle_check(struct rte_lpm_tbl_entry *tbl8,
	+ uint32_t tbl8_group_start)
	+{
	+ uint32_t tbl8_group_end, i;
	+ tbl8_group_end = tbl8_group_start + RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
	+
	+ /*
	+ * Check the first entry of the given tbl8. If it is invalid we know
	+ * this tbl8 does not contain any rule with a depth < RTE_LPM_MAX_DEPTH
	+ * (As they would affect all entries in a tbl8) and thus this table
	+ * can not be recycled.
	+ */
	+ if (tbl8[tbl8_group_start].valid) {
	+ /*
	+ * If first entry is valid check if the depth is less than 24
	+ * and if so check the rest of the entries to verify that they
	+ * are all of this depth.
	+ */
	+ if (tbl8[tbl8_group_start].depth <= MAX_DEPTH_TBL24) {
	+ for (i = (tbl8_group_start + 1); i < tbl8_group_end;
	+ i++) {
	+
	+ if (tbl8[i].depth !=
	+ tbl8[tbl8_group_start].depth) {
	+
	+ return -EEXIST;
	+ }
	+ }
	+ /* If all entries are the same return the tb8 index */
	+ return tbl8_group_start;
	+ }
	+
	+ return -EEXIST;
	+ }
	+ /*
	+ * If the first entry is invalid check if the rest of the entries in
	+ * the tbl8 are invalid.
	+ */
	+ for (i = (tbl8_group_start + 1); i < tbl8_group_end; i++) {
	+ if (tbl8[i].valid)
	+ return -EEXIST;
	+ }
	+ /* If no valid entries are found then return -EINVAL. */
	+ return -EINVAL;
	+}
	+
	+static int32_t
	+delete_depth_big(struct rte_lpm *lpm, uint32_t ip_masked,
	+ uint8_t depth, uint32_t sub_rule_nhop, uint8_t sub_rule_depth)
	+{
	+#define group_idx next_hop
	+ uint32_t tbl24_index, tbl8_group_index, tbl8_group_start, tbl8_index,
	+ tbl8_range, i;
	+ int32_t tbl8_recycle_index;
	+
	+ /*
	+ * Calculate the index into tbl24 and range. Note: All depths larger
	+ * than MAX_DEPTH_TBL24 are associated with only one tbl24 entry.
	+ */
	+ tbl24_index = ip_masked >> 8;
	+
	+ /* Calculate the index into tbl8 and range. */
	+ tbl8_group_index = lpm->tbl24[tbl24_index].group_idx;
	+ tbl8_group_start = tbl8_group_index * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
	+ tbl8_index = tbl8_group_start + (ip_masked & 0xFF);
	+ tbl8_range = depth_to_range(depth);
	+
	+ if (sub_rule_nhop == 0) {
	+ /*
	+ * Loop through the range of entries on tbl8 for which the
	+ * rule_to_delete must be removed or modified.
	+ */
	+ for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) {
	+ if (lpm->tbl8[i].depth <= depth)
	+ lpm->tbl8[i].valid = INVALID;
	+ }
	+ } else {
	+ /* Set new tbl8 entry. */
	+ struct rte_lpm_tbl_entry new_tbl8_entry = {
	+ .valid = VALID,
	+ .depth = sub_rule_depth,
	+ .valid_group = lpm->tbl8[tbl8_group_start].valid_group,
	+ .next_hop = sub_rule_nhop,
	+ };
	+
	+ /*
	+ * Loop through the range of entries on tbl8 for which the
	+ * rule_to_delete must be modified.
	+ */
	+ for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) {
	+ if (lpm->tbl8[i].depth <= depth)
	+ __atomic_store(&lpm->tbl8[i], &new_tbl8_entry,
	+ __ATOMIC_RELAXED);
	+ }
	+ }
	+
	+ /*
	+ * Check if there are any valid entries in this tbl8 group. If all
	+ * tbl8 entries are invalid we can free the tbl8 and invalidate the
	+ * associated tbl24 entry.
	+ */
	+
	+ tbl8_recycle_index = tbl8_recycle_check(lpm->tbl8, tbl8_group_start);
	+
	+ if (tbl8_recycle_index == -EINVAL) {
	+ /* Set tbl24 before freeing tbl8 to avoid race condition.
	+ * Prevent the free of the tbl8 group from hoisting.
	+ */
	+ lpm->tbl24[tbl24_index].valid = 0;
	+ __atomic_thread_fence(__ATOMIC_RELEASE);
	+ tbl8_free(lpm->tbl8, tbl8_group_start);
	+ } else if (tbl8_recycle_index > -1) {
	+ /* Update tbl24 entry. */
	+ struct rte_lpm_tbl_entry new_tbl24_entry = {
	+ .next_hop = lpm->tbl8[tbl8_recycle_index].next_hop,
	+ .valid = VALID,
	+ .valid_group = 0,
	+ .depth = lpm->tbl8[tbl8_recycle_index].depth,
	+ };
	+
	+ /* Set tbl24 before freeing tbl8 to avoid race condition.
	+ * Prevent the free of the tbl8 group from hoisting.
	+ */
	+ __atomic_store(&lpm->tbl24[tbl24_index], &new_tbl24_entry,
	+ __ATOMIC_RELAXED);
	+ __atomic_thread_fence(__ATOMIC_RELEASE);
	+ tbl8_free(lpm->tbl8, tbl8_group_start);
	+ }
	+#undef group_idx
	+ return 0;
	+}
	+
	+/*
	+ * Deletes a rule
	+ */
	+int
	+rte_lpm_delete(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
	+ uint8_t sub_rule_depth, uint32_t sub_rule_nhop)
	+{
	+ //int32_t rule_to_delete_index;
	+ uint32_t ip_masked;
	+ //uint8_t sub_rule_depth;
	+ /*
	+ * Check input arguments. Note: IP must be a positive integer of 32
	+ * bits in length therefore it need not be checked.
	+ */
	+ if ((lpm == NULL) \|\| (depth < 1) \|\| (depth > RTE_LPM_MAX_DEPTH)) {
	+ return -EINVAL;
	+ }
	+
	+ ip_masked = ip & depth_to_mask(depth);
	+
	+#if 0
	+ /*
	+ * Find the index of the input rule, that needs to be deleted, in the
	+ * rule table.
	+ */
	+ rule_to_delete_index = rule_find(lpm, ip_masked, depth);
	+
	+ /*
	+ * Check if rule_to_delete_index was found. If no rule was found the
	+ * function rule_find returns -EINVAL.
	+ */
	+ if (rule_to_delete_index < 0)
	+ return -EINVAL;
	+
	+ /* Delete the rule from the rule table. */
	+ rule_delete(lpm, rule_to_delete_index, depth);
	+#endif
	+
	+ /*
	+ * Find rule to replace the rule_to_delete. If there is no rule to
	+ * replace the rule_to_delete we return -1 and invalidate the table
	+ * entries associated with this rule.
	+ */
	+ //sub_rule_depth = *psub_rule_depth;
	+ //sub_rule_index = find_previous_rule(lpm, ip, depth, &sub_rule_depth);
	+
	+ /*
	+ * If the input depth value is less than 25 use function
	+ * delete_depth_small otherwise use delete_depth_big.
	+ */
	+ if (depth <= MAX_DEPTH_TBL24) {
	+ return delete_depth_small(lpm, ip_masked, depth,
	+ sub_rule_nhop, sub_rule_depth);
	+ } else { /* If depth > MAX_DEPTH_TBL24 */
	+ return delete_depth_big(lpm, ip_masked, depth, sub_rule_nhop,
	+ sub_rule_depth);
	+ }
	+}
	+
	+/*
	+ * Delete all rules from the LPM table.
	+ */
	+void
	+rte_lpm_delete_all(struct rte_lpm *lpm)
	+{
	+ /* Zero rule information. */
	+ memset(lpm->rule_info, 0, sizeof(lpm->rule_info));
	+
	+ /* Zero tbl24. */
	+ memset(lpm->tbl24, 0, sizeof(lpm->tbl24));
	+
	+ /* Zero tbl8. */
	+ memset(lpm->tbl8, 0, sizeof(lpm->tbl8[0])
	+ * RTE_LPM_TBL8_GROUP_NUM_ENTRIES * lpm->number_tbl8s);
	+
	+ /* Delete all rules form the rules table. */
	+ memset(lpm->rules_tbl, 0, sizeof(lpm->rules_tbl[0]) * lpm->max_rules);
	+}
	Index: sys/contrib/dpdk_rte_lpm/rte_lpm6.h
	===================================================================
	--- /dev/null
	+++ sys/contrib/dpdk_rte_lpm/rte_lpm6.h
	@@ -0,0 +1,209 @@
	+/* SPDX-License-Identifier: BSD-3-Clause
	+ * Copyright(c) 2010-2014 Intel Corporation
	+ */
	+#ifndef _RTE_LPM6_H_
	+#define _RTE_LPM6_H_
	+
	+/**
	+ * @file
	+ * RTE Longest Prefix Match for IPv6 (LPM6)
	+ */
	+
	+#ifdef __cplusplus
	+extern "C" {
	+#endif
	+
	+
	+#define RTE_LPM6_MAX_DEPTH 128
	+#define RTE_LPM6_IPV6_ADDR_SIZE 16
	+/** Max number of characters in LPM name. */
	+#define RTE_LPM6_NAMESIZE 32
	+
	+/** LPM structure. */
	+struct rte_lpm6;
	+
	+struct nhop_object;
	+struct rte_lpm6_external {
	+ struct nhop_object nh_idx; /< # -> idx mappings */
	+ uint32_t default_idx; /* nhop index of default route */
	+ uint32_t fibnum; /* fib index */
	+};
	+
	+/** LPM configuration structure. */
	+struct rte_lpm6_config {
	+ uint32_t max_rules; /*< Max number of rules. /
	+ uint32_t number_tbl8s; /*< Number of tbl8s to allocate. /
	+ int flags; /*< This field is currently unused. /
	+};
	+
	+#define RTE_LPM6_RULE_SIZE 32
	+struct rte_lpm6_rule fill_rule6(char buffer, const uint8_t *ip,
	+ uint8_t depth, uint32_t next_hop);
	+/**
	+ * Create an LPM object.
	+ *
	+ * @param name
	+ * LPM object name
	+ * @param socket_id
	+ * NUMA socket ID for LPM table memory allocation
	+ * @param config
	+ * Structure containing the configuration
	+ * @return
	+ * Handle to LPM object on success, NULL otherwise with rte_errno set
	+ * to an appropriate values. Possible rte_errno values include:
	+ * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
	+ * - E_RTE_SECONDARY - function was called from a secondary process instance
	+ * - EINVAL - invalid parameter passed to function
	+ * - ENOSPC - the maximum number of memzones has already been allocated
	+ * - EEXIST - a memzone with the same name already exists
	+ * - ENOMEM - no appropriate memory area found in which to create memzone
	+ */
	+struct rte_lpm6 *
	+rte_lpm6_create(const char *name, int socket_id,
	+ const struct rte_lpm6_config *config);
	+
	+/**
	+ * Find an existing LPM object and return a pointer to it.
	+ *
	+ * @param name
	+ * Name of the lpm object as passed to rte_lpm6_create()
	+ * @return
	+ * Pointer to lpm object or NULL if object not found with rte_errno
	+ * set appropriately. Possible rte_errno values include:
	+ * - ENOENT - required entry not available to return.
	+ */
	+struct rte_lpm6 *
	+rte_lpm6_find_existing(const char *name);
	+
	+/**
	+ * Free an LPM object.
	+ *
	+ * @param lpm
	+ * LPM object handle
	+ * @return
	+ * None
	+ */
	+void
	+rte_lpm6_free(struct rte_lpm6 *lpm);
	+
	+/**
	+ * Add a rule to the LPM table.
	+ *
	+ * @param lpm
	+ * LPM object handle
	+ * @param ip
	+ * IP of the rule to be added to the LPM table
	+ * @param depth
	+ * Depth of the rule to be added to the LPM table
	+ * @param next_hop
	+ * Next hop of the rule to be added to the LPM table
	+ * @return
	+ * 0 on success, negative value otherwise
	+ */
	+int
	+rte_lpm6_add(struct rte_lpm6 lpm, const uint8_t ip, uint8_t depth,
	+ uint32_t next_hop, int is_new_rule);
	+
	+/**
	+ * Check if a rule is present in the LPM table,
	+ * and provide its next hop if it is.
	+ *
	+ * @param lpm
	+ * LPM object handle
	+ * @param ip
	+ * IP of the rule to be searched
	+ * @param depth
	+ * Depth of the rule to searched
	+ * @param next_hop
	+ * Next hop of the rule (valid only if it is found)
	+ * @return
	+ * 1 if the rule exists, 0 if it does not, a negative value on failure
	+ */
	+int
	+rte_lpm6_is_rule_present(struct rte_lpm6 lpm, const uint8_t ip, uint8_t depth,
	+ uint32_t *next_hop);
	+
	+/**
	+ * Delete a rule from the LPM table.
	+ *
	+ * @param lpm
	+ * LPM object handle
	+ * @param ip
	+ * IP of the rule to be deleted from the LPM table
	+ * @param depth
	+ * Depth of the rule to be deleted from the LPM table
	+ * @return
	+ * 0 on success, negative value otherwise
	+ */
	+int
	+rte_lpm6_delete(struct rte_lpm6 lpm, const uint8_t ip, uint8_t depth,
	+ struct rte_lpm6_rule *lsp_rule);
	+
	+/**
	+ * Delete a rule from the LPM table.
	+ *
	+ * @param lpm
	+ * LPM object handle
	+ * @param ips
	+ * Array of IPs to be deleted from the LPM table
	+ * @param depths
	+ * Array of depths of the rules to be deleted from the LPM table
	+ * @param n
	+ * Number of rules to be deleted from the LPM table
	+ * @return
	+ * 0 on success, negative value otherwise.
	+ */
	+int
	+rte_lpm6_delete_bulk_func(struct rte_lpm6 *lpm,
	+ uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], uint8_t *depths, unsigned n);
	+
	+/**
	+ * Delete all rules from the LPM table.
	+ *
	+ * @param lpm
	+ * LPM object handle
	+ */
	+void
	+rte_lpm6_delete_all(struct rte_lpm6 *lpm);
	+
	+/**
	+ * Lookup an IP into the LPM table.
	+ *
	+ * @param lpm
	+ * LPM object handle
	+ * @param ip
	+ * IP to be looked up in the LPM table
	+ * @param next_hop
	+ * Next hop of the most specific rule found for IP (valid on lookup hit only)
	+ * @return
	+ * -EINVAL for incorrect arguments, -ENOENT on lookup miss, 0 on lookup hit
	+ */
	+int
	+rte_lpm6_lookup(const struct rte_lpm6 lpm, const uint8_t ip, uint32_t *next_hop);
	+
	+/**
	+ * Lookup multiple IP addresses in an LPM table.
	+ *
	+ * @param lpm
	+ * LPM object handle
	+ * @param ips
	+ * Array of IPs to be looked up in the LPM table
	+ * @param next_hops
	+ * Next hop of the most specific rule found for IP (valid on lookup hit only).
	+ * This is an array of two byte values. The next hop will be stored on
	+ * each position on success; otherwise the position will be set to -1.
	+ * @param n
	+ * Number of elements in ips (and next_hops) array to lookup.
	+ * @return
	+ * -EINVAL for incorrect arguments, otherwise 0
	+ */
	+int
	+rte_lpm6_lookup_bulk_func(const struct rte_lpm6 *lpm,
	+ uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE],
	+ int32_t *next_hops, unsigned int n);
	+
	+#ifdef __cplusplus
	+}
	+#endif
	+
	+#endif
	Index: sys/contrib/dpdk_rte_lpm/rte_lpm6.c
	===================================================================
	--- /dev/null
	+++ sys/contrib/dpdk_rte_lpm/rte_lpm6.c
	@@ -0,0 +1,1415 @@
	+/* SPDX-License-Identifier: BSD-3-Clause
	+ * Copyright(c) 2010-2014 Intel Corporation
	+ */
	+
	+#include <sys/param.h>
	+#include <sys/ctype.h>
	+#include <sys/systm.h>
	+#include <sys/lock.h>
	+#include <sys/rwlock.h>
	+#include <sys/malloc.h>
	+#include <sys/mbuf.h>
	+#include <sys/socket.h>
	+#include <sys/kernel.h>
	+
	+//#include <netinet6/rte_tailq.h>
	+int errno = 0, rte_errno = 0;
	+
	+#include "rte_shim.h"
	+#include "rte_lpm6.h"
	+
	+#define RTE_LPM6_TBL24_NUM_ENTRIES (1 << 24)
	+#define RTE_LPM6_TBL8_GROUP_NUM_ENTRIES 256
	+#define RTE_LPM6_TBL8_MAX_NUM_GROUPS (1 << 21)
	+
	+#define RTE_LPM6_VALID_EXT_ENTRY_BITMASK 0xA0000000
	+#define RTE_LPM6_LOOKUP_SUCCESS 0x20000000
	+#define RTE_LPM6_TBL8_BITMASK 0x001FFFFF
	+
	+#define ADD_FIRST_BYTE 3
	+#define LOOKUP_FIRST_BYTE 4
	+#define BYTE_SIZE 8
	+#define BYTES2_SIZE 16
	+
	+#define RULE_HASH_TABLE_EXTRA_SPACE 64
	+#define TBL24_IND UINT32_MAX
	+
	+#define lpm6_tbl8_gindex next_hop
	+
	+/** Flags for setting an entry as valid/invalid. */
	+enum valid_flag {
	+ INVALID = 0,
	+ VALID
	+};
	+
	+#if 0
	+TAILQ_HEAD(rte_lpm6_list, rte_tailq_entry);
	+
	+static struct rte_tailq_elem rte_lpm6_tailq = {
	+ .name = "RTE_LPM6",
	+};
	+EAL_REGISTER_TAILQ(rte_lpm6_tailq)
	+#endif
	+
	+/** Tbl entry structure. It is the same for both tbl24 and tbl8 */
	+struct rte_lpm6_tbl_entry {
	+ uint32_t next_hop: 21; /*< Next hop / next table to be checked. /
	+ uint32_t depth :8; /*< Rule depth. /
	+
	+ /* Flags. */
	+ uint32_t valid :1; /*< Validation flag. /
	+ uint32_t valid_group :1; /*< Group validation flag. /
	+ uint32_t ext_entry :1; /*< External entry. /
	+};
	+
	+/** Rules tbl entry structure. */
	+struct rte_lpm6_rule {
	+ uint8_t ip[RTE_LPM6_IPV6_ADDR_SIZE]; /*< Rule IP address. /
	+ uint32_t next_hop; /*< Rule next hop. /
	+ uint8_t depth; /*< Rule depth. /
	+};
	+
	+/** Rules tbl entry key. */
	+struct rte_lpm6_rule_key {
	+ uint8_t ip[RTE_LPM6_IPV6_ADDR_SIZE]; /*< Rule IP address. /
	+ uint8_t depth; /*< Rule depth. /
	+};
	+
	+/* Header of tbl8 */
	+struct rte_lpm_tbl8_hdr {
	+ uint32_t owner_tbl_ind; /**< owner table: TBL24_IND if owner is tbl24,
	+ * otherwise index of tbl8
	+ */
	+ uint32_t owner_entry_ind; /**< index of the owner table entry where
	+ * pointer to the tbl8 is stored
	+ */
	+ uint32_t ref_cnt; /*< table reference counter /
	+};
	+
	+/** LPM6 structure. */
	+struct rte_lpm6 {
	+ struct rte_lpm6_external ext; /* Storage used by the algo wrapper */
	+ /* LPM metadata. */
	+ char name[RTE_LPM6_NAMESIZE]; /*< Name of the lpm. /
	+ uint32_t max_rules; /*< Max number of rules. /
	+ uint32_t used_rules; /*< Used rules so far. /
	+ uint32_t number_tbl8s; /*< Number of tbl8s to allocate. /
	+
	+ /* LPM Tables. */
	+ //struct rte_hash rules_tbl; /< LPM rules. /
	+ struct rte_lpm6_tbl_entry tbl24[RTE_LPM6_TBL24_NUM_ENTRIES]
	+ __rte_cache_aligned; /*< LPM tbl24 table. /
	+
	+ uint32_t tbl8_pool; /< pool of indexes of free tbl8s /
	+ uint32_t tbl8_pool_pos; /*< current position in the tbl8 pool /
	+
	+ struct rte_lpm_tbl8_hdr tbl8_hdrs; / array of tbl8 headers */
	+
	+ struct rte_lpm6_tbl_entry tbl8[0]
	+ __rte_cache_aligned; /*< LPM tbl8 table. /
	+};
	+
	+/*
	+ * Takes an array of uint8_t (IPv6 address) and masks it using the depth.
	+ * It leaves untouched one bit per unit in the depth variable
	+ * and set the rest to 0.
	+ */
	+static inline void
	+ip6_mask_addr(uint8_t *ip, uint8_t depth)
	+{
	+ int16_t part_depth, mask;
	+ int i;
	+
	+ part_depth = depth;
	+
	+ for (i = 0; i < RTE_LPM6_IPV6_ADDR_SIZE; i++) {
	+ if (part_depth < BYTE_SIZE && part_depth >= 0) {
	+ mask = (uint16_t)(~(UINT8_MAX >> part_depth));
	+ ip[i] = (uint8_t)(ip[i] & mask);
	+ } else if (part_depth < 0)
	+ ip[i] = 0;
	+
	+ part_depth -= BYTE_SIZE;
	+ }
	+}
	+
	+/* copy ipv6 address */
	+static inline void
	+ip6_copy_addr(uint8_t dst, const uint8_t src)
	+{
	+ rte_memcpy(dst, src, RTE_LPM6_IPV6_ADDR_SIZE);
	+}
	+
	+#if 0
	+/*
	+ * LPM6 rule hash function
	+ *
	+ * It's used as a hash function for the rte_hash
	+ * containing rules
	+ */
	+static inline uint32_t
	+rule_hash(const void *data, __rte_unused uint32_t data_len,
	+ uint32_t init_val)
	+{
	+ return rte_jhash(data, sizeof(struct rte_lpm6_rule_key), init_val);
	+}
	+#endif
	+
	+/*
	+ * Init pool of free tbl8 indexes
	+ */
	+static void
	+tbl8_pool_init(struct rte_lpm6 *lpm)
	+{
	+ uint32_t i;
	+
	+ /* put entire range of indexes to the tbl8 pool */
	+ for (i = 0; i < lpm->number_tbl8s; i++)
	+ lpm->tbl8_pool[i] = i;
	+
	+ lpm->tbl8_pool_pos = 0;
	+}
	+
	+/*
	+ * Get an index of a free tbl8 from the pool
	+ */
	+static inline uint32_t
	+tbl8_get(struct rte_lpm6 lpm, uint32_t tbl8_ind)
	+{
	+ if (lpm->tbl8_pool_pos == lpm->number_tbl8s)
	+ /* no more free tbl8 */
	+ return -ENOSPC;
	+
	+ /* next index */
	+ *tbl8_ind = lpm->tbl8_pool[lpm->tbl8_pool_pos++];
	+ return 0;
	+}
	+
	+/*
	+ * Put an index of a free tbl8 back to the pool
	+ */
	+static inline uint32_t
	+tbl8_put(struct rte_lpm6 *lpm, uint32_t tbl8_ind)
	+{
	+ if (lpm->tbl8_pool_pos == 0)
	+ /* pool is full */
	+ return -ENOSPC;
	+
	+ lpm->tbl8_pool[--lpm->tbl8_pool_pos] = tbl8_ind;
	+ return 0;
	+}
	+
	+/*
	+ * Returns number of tbl8s available in the pool
	+ */
	+static inline uint32_t
	+tbl8_available(struct rte_lpm6 *lpm)
	+{
	+ return lpm->number_tbl8s - lpm->tbl8_pool_pos;
	+}
	+
	+#if 0
	+/*
	+ * Init a rule key.
	+ * note that ip must be already masked
	+ */
	+static inline void
	+rule_key_init(struct rte_lpm6_rule_key key, uint8_t ip, uint8_t depth)
	+{
	+ ip6_copy_addr(key->ip, ip);
	+ key->depth = depth;
	+}
	+
	+/*
	+ * Rebuild the entire LPM tree by reinserting all rules
	+ */
	+static void
	+rebuild_lpm(struct rte_lpm6 *lpm)
	+{
	+ uint64_t next_hop;
	+ struct rte_lpm6_rule_key *rule_key;
	+ uint32_t iter = 0;
	+
	+ while (rte_hash_iterate(lpm->rules_tbl, (void *) &rule_key,
	+ (void **) &next_hop, &iter) >= 0)
	+ rte_lpm6_add(lpm, rule_key->ip, rule_key->depth,
	+ (uint32_t) next_hop);
	+}
	+#endif
	+
	+/*
	+ * Allocates memory for LPM object
	+ */
	+struct rte_lpm6 *
	+rte_lpm6_create(const char *name, int socket_id,
	+ const struct rte_lpm6_config *config)
	+{
	+ char mem_name[RTE_LPM6_NAMESIZE];
	+ struct rte_lpm6 *lpm = NULL;
	+ //struct rte_tailq_entry *te;
	+ uint64_t mem_size;
	+ //struct rte_lpm6_list *lpm_list;
	+ //struct rte_hash *rules_tbl = NULL;
	+ uint32_t *tbl8_pool = NULL;
	+ struct rte_lpm_tbl8_hdr *tbl8_hdrs = NULL;
	+
	+ //lpm_list = RTE_TAILQ_CAST(rte_lpm6_tailq.head, rte_lpm6_list);
	+
	+ RTE_BUILD_BUG_ON(sizeof(struct rte_lpm6_tbl_entry) != sizeof(uint32_t));
	+
	+ /* Check user arguments. */
	+ if ((name == NULL) \|\| (socket_id < -1) \|\| (config == NULL) \|\|
	+ config->number_tbl8s > RTE_LPM6_TBL8_MAX_NUM_GROUPS) {
	+ rte_errno = EINVAL;
	+ return NULL;
	+ }
	+
	+#if 0
	+ /* create rules hash table */
	+ snprintf(mem_name, sizeof(mem_name), "LRH_%s", name);
	+ struct rte_hash_parameters rule_hash_tbl_params = {
	+ .entries = config->max_rules * 1.2 +
	+ RULE_HASH_TABLE_EXTRA_SPACE,
	+ .key_len = sizeof(struct rte_lpm6_rule_key),
	+ .hash_func = rule_hash,
	+ .hash_func_init_val = 0,
	+ .name = mem_name,
	+ .reserved = 0,
	+ .socket_id = socket_id,
	+ .extra_flag = 0
	+ };
	+
	+ rules_tbl = rte_hash_create(&rule_hash_tbl_params);
	+ if (rules_tbl == NULL) {
	+ RTE_LOG(ERR, LPM, "LPM rules hash table allocation failed: %s (%d)",
	+ rte_strerror(rte_errno), rte_errno);
	+ goto fail_wo_unlock;
	+ }
	+#endif
	+
	+ /* allocate tbl8 indexes pool */
	+ tbl8_pool = rte_malloc(NULL,
	+ sizeof(uint32_t) * config->number_tbl8s,
	+ RTE_CACHE_LINE_SIZE);
	+ if (tbl8_pool == NULL) {
	+ RTE_LOG(ERR, LPM, "LPM tbl8 pool allocation failed: %s (%d)",
	+ rte_strerror(rte_errno), rte_errno);
	+ rte_errno = ENOMEM;
	+ goto fail_wo_unlock;
	+ }
	+
	+ /* allocate tbl8 headers */
	+ tbl8_hdrs = rte_malloc(NULL,
	+ sizeof(struct rte_lpm_tbl8_hdr) * config->number_tbl8s,
	+ RTE_CACHE_LINE_SIZE);
	+ if (tbl8_hdrs == NULL) {
	+ RTE_LOG(ERR, LPM, "LPM tbl8 headers allocation failed: %s (%d)",
	+ rte_strerror(rte_errno), rte_errno);
	+ rte_errno = ENOMEM;
	+ goto fail_wo_unlock;
	+ }
	+
	+ snprintf(mem_name, sizeof(mem_name), "LPM_%s", name);
	+
	+ /* Determine the amount of memory to allocate. */
	+ mem_size = sizeof(lpm) + (sizeof(lpm->tbl8[0])
	+ RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * config->number_tbl8s);
	+
	+#if 0
	+ rte_mcfg_tailq_write_lock();
	+
	+ /* Guarantee there's no existing */
	+ TAILQ_FOREACH(te, lpm_list, next) {
	+ lpm = (struct rte_lpm6 *) te->data;
	+ if (strncmp(name, lpm->name, RTE_LPM6_NAMESIZE) == 0)
	+ break;
	+ }
	+ lpm = NULL;
	+ if (te != NULL) {
	+ rte_errno = EEXIST;
	+ goto fail;
	+ }
	+
	+ /* allocate tailq entry */
	+ te = rte_zmalloc("LPM6_TAILQ_ENTRY", sizeof(*te), 0);
	+ if (te == NULL) {
	+ RTE_LOG(ERR, LPM, "Failed to allocate tailq entry!\n");
	+ rte_errno = ENOMEM;
	+ goto fail;
	+ }
	+#endif
	+
	+ /* Allocate memory to store the LPM data structures. */
	+ lpm = rte_zmalloc_socket(mem_name, (size_t)mem_size,
	+ RTE_CACHE_LINE_SIZE, socket_id);
	+
	+ if (lpm == NULL) {
	+ RTE_LOG(ERR, LPM, "LPM memory allocation failed\n");
	+ //rte_free(te);
	+ rte_errno = ENOMEM;
	+ goto fail;
	+ }
	+
	+ /* Save user arguments. */
	+ //lpm->max_rules = config->max_rules;
	+ lpm->number_tbl8s = config->number_tbl8s;
	+ strlcpy(lpm->name, name, sizeof(lpm->name));
	+ //lpm->rules_tbl = rules_tbl;
	+ lpm->tbl8_pool = tbl8_pool;
	+ lpm->tbl8_hdrs = tbl8_hdrs;
	+
	+ /* init the stack */
	+ tbl8_pool_init(lpm);
	+
	+ //te->data = (void *) lpm;
	+
	+ //TAILQ_INSERT_TAIL(lpm_list, te, next);
	+ rte_mcfg_tailq_write_unlock();
	+ return lpm;
	+
	+fail:
	+ rte_mcfg_tailq_write_unlock();
	+
	+fail_wo_unlock:
	+ rte_free(tbl8_hdrs);
	+ rte_free(tbl8_pool);
	+ //rte_hash_free(rules_tbl);
	+
	+ return NULL;
	+}
	+
	+#if 0
	+/*
	+ * Find an existing lpm table and return a pointer to it.
	+ */
	+struct rte_lpm6 *
	+rte_lpm6_find_existing(const char *name)
	+{
	+ struct rte_lpm6 *l = NULL;
	+ struct rte_tailq_entry *te;
	+ struct rte_lpm6_list *lpm_list;
	+
	+ lpm_list = RTE_TAILQ_CAST(rte_lpm6_tailq.head, rte_lpm6_list);
	+
	+ rte_mcfg_tailq_read_lock();
	+ TAILQ_FOREACH(te, lpm_list, next) {
	+ l = (struct rte_lpm6 *) te->data;
	+ if (strncmp(name, l->name, RTE_LPM6_NAMESIZE) == 0)
	+ break;
	+ }
	+ rte_mcfg_tailq_read_unlock();
	+
	+ if (te == NULL) {
	+ rte_errno = ENOENT;
	+ return NULL;
	+ }
	+
	+ return l;
	+}
	+#endif
	+
	+/*
	+ * Deallocates memory for given LPM table.
	+ */
	+void
	+rte_lpm6_free(struct rte_lpm6 *lpm)
	+{
	+#if 0
	+ struct rte_lpm6_list *lpm_list;
	+ struct rte_tailq_entry *te;
	+
	+ /* Check user arguments. */
	+ if (lpm == NULL)
	+ return;
	+
	+ lpm_list = RTE_TAILQ_CAST(rte_lpm6_tailq.head, rte_lpm6_list);
	+
	+ rte_mcfg_tailq_write_lock();
	+
	+ /* find our tailq entry */
	+ TAILQ_FOREACH(te, lpm_list, next) {
	+ if (te->data == (void *) lpm)
	+ break;
	+ }
	+
	+ if (te != NULL)
	+ TAILQ_REMOVE(lpm_list, te, next);
	+
	+ rte_mcfg_tailq_write_unlock();
	+#endif
	+
	+ rte_free(lpm->tbl8_hdrs);
	+ rte_free(lpm->tbl8_pool);
	+ //rte_hash_free(lpm->rules_tbl);
	+ rte_free(lpm);
	+ //rte_free(te);
	+}
	+
	+#if 0
	+/* Find a rule */
	+static inline int
	+rule_find_with_key(struct rte_lpm6 *lpm,
	+ const struct rte_lpm6_rule_key *rule_key,
	+ uint32_t *next_hop)
	+{
	+ uint64_t hash_val;
	+ int ret;
	+
	+ /* lookup for a rule */
	+ ret = rte_hash_lookup_data(lpm->rules_tbl, (const void *) rule_key,
	+ (void **) &hash_val);
	+ if (ret >= 0) {
	+ *next_hop = (uint32_t) hash_val;
	+ return 1;
	+ }
	+
	+ return 0;
	+}
	+
	+/* Find a rule */
	+static int
	+rule_find(struct rte_lpm6 lpm, uint8_t ip, uint8_t depth,
	+ uint32_t *next_hop)
	+{
	+ struct rte_lpm6_rule_key rule_key;
	+
	+ /* init a rule key */
	+ rule_key_init(&rule_key, ip, depth);
	+
	+ return rule_find_with_key(lpm, &rule_key, next_hop);
	+}
	+
	+/*
	+ * Checks if a rule already exists in the rules table and updates
	+ * the nexthop if so. Otherwise it adds a new rule if enough space is available.
	+ *
	+ * Returns:
	+ * 0 - next hop of existed rule is updated
	+ * 1 - new rule successfully added
	+ * <0 - error
	+ */
	+static inline int
	+rule_add(struct rte_lpm6 lpm, uint8_t ip, uint8_t depth, uint32_t next_hop)
	+{
	+ int ret, rule_exist;
	+ struct rte_lpm6_rule_key rule_key;
	+ uint32_t unused;
	+
	+ /* init a rule key */
	+ rule_key_init(&rule_key, ip, depth);
	+
	+ /* Scan through rule list to see if rule already exists. */
	+ rule_exist = rule_find_with_key(lpm, &rule_key, &unused);
	+
	+ /*
	+ * If rule does not exist check if there is space to add a new rule to
	+ * this rule group. If there is no space return error.
	+ */
	+ if (!rule_exist && lpm->used_rules == lpm->max_rules)
	+ return -ENOSPC;
	+
	+ /* add the rule or update rules next hop */
	+ ret = rte_hash_add_key_data(lpm->rules_tbl, &rule_key,
	+ (void *)(uintptr_t) next_hop);
	+ if (ret < 0)
	+ return ret;
	+
	+ /* Increment the used rules counter for this rule group. */
	+ if (!rule_exist) {
	+ lpm->used_rules++;
	+ return 1;
	+ }
	+
	+ return 0;
	+}
	+#endif
	+
	+/*
	+ * Function that expands a rule across the data structure when a less-generic
	+ * one has been added before. It assures that every possible combination of bits
	+ * in the IP address returns a match.
	+ */
	+static void
	+expand_rule(struct rte_lpm6 *lpm, uint32_t tbl8_gindex, uint8_t old_depth,
	+ uint8_t new_depth, uint32_t next_hop, uint8_t valid)
	+{
	+ uint32_t tbl8_group_end, tbl8_gindex_next, j;
	+
	+ tbl8_group_end = tbl8_gindex + RTE_LPM6_TBL8_GROUP_NUM_ENTRIES;
	+
	+ struct rte_lpm6_tbl_entry new_tbl8_entry = {
	+ .valid = valid,
	+ .valid_group = valid,
	+ .depth = new_depth,
	+ .next_hop = next_hop,
	+ .ext_entry = 0,
	+ };
	+
	+ for (j = tbl8_gindex; j < tbl8_group_end; j++) {
	+ if (!lpm->tbl8[j].valid \|\| (lpm->tbl8[j].ext_entry == 0
	+ && lpm->tbl8[j].depth <= old_depth)) {
	+
	+ lpm->tbl8[j] = new_tbl8_entry;
	+
	+ } else if (lpm->tbl8[j].ext_entry == 1) {
	+
	+ tbl8_gindex_next = lpm->tbl8[j].lpm6_tbl8_gindex
	+ * RTE_LPM6_TBL8_GROUP_NUM_ENTRIES;
	+ expand_rule(lpm, tbl8_gindex_next, old_depth, new_depth,
	+ next_hop, valid);
	+ }
	+ }
	+}
	+
	+/*
	+ * Init a tbl8 header
	+ */
	+static inline void
	+init_tbl8_header(struct rte_lpm6 *lpm, uint32_t tbl_ind,
	+ uint32_t owner_tbl_ind, uint32_t owner_entry_ind)
	+{
	+ struct rte_lpm_tbl8_hdr *tbl_hdr = &lpm->tbl8_hdrs[tbl_ind];
	+ tbl_hdr->owner_tbl_ind = owner_tbl_ind;
	+ tbl_hdr->owner_entry_ind = owner_entry_ind;
	+ tbl_hdr->ref_cnt = 0;
	+}
	+
	+/*
	+ * Calculate index to the table based on the number and position
	+ * of the bytes being inspected in this step.
	+ */
	+static uint32_t
	+get_bitshift(const uint8_t *ip, uint8_t first_byte, uint8_t bytes)
	+{
	+ uint32_t entry_ind, i;
	+ int8_t bitshift;
	+
	+ entry_ind = 0;
	+ for (i = first_byte; i < (uint32_t)(first_byte + bytes); i++) {
	+ bitshift = (int8_t)((bytes - i)*BYTE_SIZE);
	+
	+ if (bitshift < 0)
	+ bitshift = 0;
	+ entry_ind = entry_ind \| ip[i-1] << bitshift;
	+ }
	+
	+ return entry_ind;
	+}
	+
	+/*
	+ * Simulate adding a new route to the LPM counting number
	+ * of new tables that will be needed
	+ *
	+ * It returns 0 on success, or 1 if
	+ * the process needs to be continued by calling the function again.
	+ */
	+static inline int
	+simulate_add_step(struct rte_lpm6 lpm, struct rte_lpm6_tbl_entry tbl,
	+ struct rte_lpm6_tbl_entry *next_tbl, const uint8_t ip,
	+ uint8_t bytes, uint8_t first_byte, uint8_t depth,
	+ uint32_t *need_tbl_nb)
	+{
	+ uint32_t entry_ind;
	+ uint8_t bits_covered;
	+ uint32_t next_tbl_ind;
	+
	+ /*
	+ * Calculate index to the table based on the number and position
	+ * of the bytes being inspected in this step.
	+ */
	+ entry_ind = get_bitshift(ip, first_byte, bytes);
	+
	+ /* Number of bits covered in this step */
	+ bits_covered = (uint8_t)((bytes+first_byte-1)*BYTE_SIZE);
	+
	+ if (depth <= bits_covered) {
	+ *need_tbl_nb = 0;
	+ return 0;
	+ }
	+
	+ if (tbl[entry_ind].valid == 0 \|\| tbl[entry_ind].ext_entry == 0) {
	+ /* from this point on a new table is needed on each level
	+ * that is not covered yet
	+ */
	+ depth -= bits_covered;
	+ uint32_t cnt = depth >> 3; /* depth / BYTE_SIZE */
	+ if (depth & 7) /* 0b00000111 */
	+ /* if depth % 8 > 0 then one more table is needed
	+ * for those last bits
	+ */
	+ cnt++;
	+
	+ *need_tbl_nb = cnt;
	+ return 0;
	+ }
	+
	+ next_tbl_ind = tbl[entry_ind].lpm6_tbl8_gindex;
	+ next_tbl = &(lpm->tbl8[next_tbl_ind
	+ RTE_LPM6_TBL8_GROUP_NUM_ENTRIES]);
	+ *need_tbl_nb = 0;
	+ return 1;
	+}
	+
	+/*
	+ * Partially adds a new route to the data structure (tbl24+tbl8s).
	+ * It returns 0 on success, a negative number on failure, or 1 if
	+ * the process needs to be continued by calling the function again.
	+ */
	+static inline int
	+add_step(struct rte_lpm6 lpm, struct rte_lpm6_tbl_entry tbl,
	+ uint32_t tbl_ind, struct rte_lpm6_tbl_entry **next_tbl,
	+ uint32_t next_tbl_ind, uint8_t ip, uint8_t bytes,
	+ uint8_t first_byte, uint8_t depth, uint32_t next_hop,
	+ uint8_t is_new_rule)
	+{
	+ uint32_t entry_ind, tbl_range, tbl8_group_start, tbl8_group_end, i;
	+ uint32_t tbl8_gindex;
	+ uint8_t bits_covered;
	+ int ret;
	+
	+ /*
	+ * Calculate index to the table based on the number and position
	+ * of the bytes being inspected in this step.
	+ */
	+ entry_ind = get_bitshift(ip, first_byte, bytes);
	+
	+ /* Number of bits covered in this step */
	+ bits_covered = (uint8_t)((bytes+first_byte-1)*BYTE_SIZE);
	+
	+ /*
	+ * If depth if smaller than this number (ie this is the last step)
	+ * expand the rule across the relevant positions in the table.
	+ */
	+ if (depth <= bits_covered) {
	+ tbl_range = 1 << (bits_covered - depth);
	+
	+ for (i = entry_ind; i < (entry_ind + tbl_range); i++) {
	+ if (!tbl[i].valid \|\| (tbl[i].ext_entry == 0 &&
	+ tbl[i].depth <= depth)) {
	+
	+ struct rte_lpm6_tbl_entry new_tbl_entry = {
	+ .next_hop = next_hop,
	+ .depth = depth,
	+ .valid = VALID,
	+ .valid_group = VALID,
	+ .ext_entry = 0,
	+ };
	+
	+ tbl[i] = new_tbl_entry;
	+
	+ } else if (tbl[i].ext_entry == 1) {
	+
	+ /*
	+ * If tbl entry is valid and extended calculate the index
	+ * into next tbl8 and expand the rule across the data structure.
	+ */
	+ tbl8_gindex = tbl[i].lpm6_tbl8_gindex *
	+ RTE_LPM6_TBL8_GROUP_NUM_ENTRIES;
	+ expand_rule(lpm, tbl8_gindex, depth, depth,
	+ next_hop, VALID);
	+ }
	+ }
	+
	+ /* update tbl8 rule reference counter */
	+ if (tbl_ind != TBL24_IND && is_new_rule)
	+ lpm->tbl8_hdrs[tbl_ind].ref_cnt++;
	+
	+ return 0;
	+ }
	+ /*
	+ * If this is not the last step just fill one position
	+ * and calculate the index to the next table.
	+ */
	+ else {
	+ /* If it's invalid a new tbl8 is needed */
	+ if (!tbl[entry_ind].valid) {
	+ /* get a new table */
	+ ret = tbl8_get(lpm, &tbl8_gindex);
	+ if (ret != 0)
	+ return -ENOSPC;
	+
	+ /* invalidate all new tbl8 entries */
	+ tbl8_group_start = tbl8_gindex *
	+ RTE_LPM6_TBL8_GROUP_NUM_ENTRIES;
	+ memset(&lpm->tbl8[tbl8_group_start], 0,
	+ RTE_LPM6_TBL8_GROUP_NUM_ENTRIES *
	+ sizeof(struct rte_lpm6_tbl_entry));
	+
	+ /* init the new table's header:
	+ * save the reference to the owner table
	+ */
	+ init_tbl8_header(lpm, tbl8_gindex, tbl_ind, entry_ind);
	+
	+ /* reference to a new tbl8 */
	+ struct rte_lpm6_tbl_entry new_tbl_entry = {
	+ .lpm6_tbl8_gindex = tbl8_gindex,
	+ .depth = 0,
	+ .valid = VALID,
	+ .valid_group = VALID,
	+ .ext_entry = 1,
	+ };
	+
	+ tbl[entry_ind] = new_tbl_entry;
	+
	+ /* update the current table's reference counter */
	+ if (tbl_ind != TBL24_IND)
	+ lpm->tbl8_hdrs[tbl_ind].ref_cnt++;
	+ }
	+ /*
	+ * If it's valid but not extended the rule that was stored
	+ * here needs to be moved to the next table.
	+ */
	+ else if (tbl[entry_ind].ext_entry == 0) {
	+ /* get a new tbl8 index */
	+ ret = tbl8_get(lpm, &tbl8_gindex);
	+ if (ret != 0)
	+ return -ENOSPC;
	+
	+ tbl8_group_start = tbl8_gindex *
	+ RTE_LPM6_TBL8_GROUP_NUM_ENTRIES;
	+ tbl8_group_end = tbl8_group_start +
	+ RTE_LPM6_TBL8_GROUP_NUM_ENTRIES;
	+
	+ struct rte_lpm6_tbl_entry tbl_entry = {
	+ .next_hop = tbl[entry_ind].next_hop,
	+ .depth = tbl[entry_ind].depth,
	+ .valid = VALID,
	+ .valid_group = VALID,
	+ .ext_entry = 0
	+ };
	+
	+ /* Populate new tbl8 with tbl value. */
	+ for (i = tbl8_group_start; i < tbl8_group_end; i++)
	+ lpm->tbl8[i] = tbl_entry;
	+
	+ /* init the new table's header:
	+ * save the reference to the owner table
	+ */
	+ init_tbl8_header(lpm, tbl8_gindex, tbl_ind, entry_ind);
	+
	+ /*
	+ * Update tbl entry to point to new tbl8 entry. Note: The
	+ * ext_flag and tbl8_index need to be updated simultaneously,
	+ * so assign whole structure in one go.
	+ */
	+ struct rte_lpm6_tbl_entry new_tbl_entry = {
	+ .lpm6_tbl8_gindex = tbl8_gindex,
	+ .depth = 0,
	+ .valid = VALID,
	+ .valid_group = VALID,
	+ .ext_entry = 1,
	+ };
	+
	+ tbl[entry_ind] = new_tbl_entry;
	+
	+ /* update the current table's reference counter */
	+ if (tbl_ind != TBL24_IND)
	+ lpm->tbl8_hdrs[tbl_ind].ref_cnt++;
	+ }
	+
	+ *next_tbl_ind = tbl[entry_ind].lpm6_tbl8_gindex;
	+ next_tbl = &(lpm->tbl8[next_tbl_ind *
	+ RTE_LPM6_TBL8_GROUP_NUM_ENTRIES]);
	+ }
	+
	+ return 1;
	+}
	+
	+/*
	+ * Simulate adding a route to LPM
	+ *
	+ * Returns:
	+ * 0 on success
	+ * -ENOSPC not enough tbl8 left
	+ */
	+static int
	+simulate_add(struct rte_lpm6 lpm, const uint8_t masked_ip, uint8_t depth)
	+{
	+ struct rte_lpm6_tbl_entry *tbl;
	+ struct rte_lpm6_tbl_entry *tbl_next = NULL;
	+ int ret, i;
	+
	+ /* number of new tables needed for a step */
	+ uint32_t need_tbl_nb;
	+ /* total number of new tables needed */
	+ uint32_t total_need_tbl_nb;
	+
	+ /* Inspect the first three bytes through tbl24 on the first step. */
	+ ret = simulate_add_step(lpm, lpm->tbl24, &tbl_next, masked_ip,
	+ ADD_FIRST_BYTE, 1, depth, &need_tbl_nb);
	+ total_need_tbl_nb = need_tbl_nb;
	+ /*
	+ * Inspect one by one the rest of the bytes until
	+ * the process is completed.
	+ */
	+ for (i = ADD_FIRST_BYTE; i < RTE_LPM6_IPV6_ADDR_SIZE && ret == 1; i++) {
	+ tbl = tbl_next;
	+ ret = simulate_add_step(lpm, tbl, &tbl_next, masked_ip, 1,
	+ (uint8_t)(i + 1), depth, &need_tbl_nb);
	+ total_need_tbl_nb += need_tbl_nb;
	+ }
	+
	+ if (tbl8_available(lpm) < total_need_tbl_nb)
	+ /* not enough tbl8 to add a rule */
	+ return -ENOSPC;
	+
	+ return 0;
	+}
	+
	+/*
	+ * Add a route
	+ */
	+int
	+rte_lpm6_add(struct rte_lpm6 lpm, const uint8_t ip, uint8_t depth,
	+ uint32_t next_hop, int is_new_rule)
	+{
	+ struct rte_lpm6_tbl_entry *tbl;
	+ struct rte_lpm6_tbl_entry *tbl_next = NULL;
	+ /* init to avoid compiler warning */
	+ uint32_t tbl_next_num = 123456;
	+ int status;
	+ uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE];
	+ int i;
	+
	+ /* Check user arguments. */
	+ if ((lpm == NULL) \|\| (depth < 1) \|\| (depth > RTE_LPM6_MAX_DEPTH))
	+ return -EINVAL;
	+
	+ /* Copy the IP and mask it to avoid modifying user's input data. */
	+ ip6_copy_addr(masked_ip, ip);
	+ ip6_mask_addr(masked_ip, depth);
	+
	+ /* Simulate adding a new route */
	+ int ret = simulate_add(lpm, masked_ip, depth);
	+ if (ret < 0)
	+ return ret;
	+
	+#if 0
	+ /* Add the rule to the rule table. */
	+ int is_new_rule = rule_add(lpm, masked_ip, depth, next_hop);
	+ /* If there is no space available for new rule return error. */
	+ if (is_new_rule < 0)
	+ return is_new_rule;
	+#endif
	+
	+ /* Inspect the first three bytes through tbl24 on the first step. */
	+ tbl = lpm->tbl24;
	+ status = add_step(lpm, tbl, TBL24_IND, &tbl_next, &tbl_next_num,
	+ masked_ip, ADD_FIRST_BYTE, 1, depth, next_hop,
	+ is_new_rule);
	+ assert(status >= 0);
	+
	+ /*
	+ * Inspect one by one the rest of the bytes until
	+ * the process is completed.
	+ */
	+ for (i = ADD_FIRST_BYTE; i < RTE_LPM6_IPV6_ADDR_SIZE && status == 1; i++) {
	+ tbl = tbl_next;
	+ status = add_step(lpm, tbl, tbl_next_num, &tbl_next,
	+ &tbl_next_num, masked_ip, 1, (uint8_t)(i + 1),
	+ depth, next_hop, is_new_rule);
	+ assert(status >= 0);
	+ }
	+
	+ return status;
	+}
	+
	+/*
	+ * Takes a pointer to a table entry and inspect one level.
	+ * The function returns 0 on lookup success, ENOENT if no match was found
	+ * or 1 if the process needs to be continued by calling the function again.
	+ */
	+static inline int
	+lookup_step(const struct rte_lpm6 lpm, const struct rte_lpm6_tbl_entry tbl,
	+ const struct rte_lpm6_tbl_entry *tbl_next, const uint8_t ip,
	+ uint8_t first_byte, uint32_t *next_hop)
	+{
	+ uint32_t tbl8_index, tbl_entry;
	+
	+ /* Take the integer value from the pointer. */
	+ tbl_entry = (const uint32_t )tbl;
	+
	+ /* If it is valid and extended we calculate the new pointer to return. */
	+ if ((tbl_entry & RTE_LPM6_VALID_EXT_ENTRY_BITMASK) ==
	+ RTE_LPM6_VALID_EXT_ENTRY_BITMASK) {
	+
	+ tbl8_index = ip[first_byte-1] +
	+ ((tbl_entry & RTE_LPM6_TBL8_BITMASK) *
	+ RTE_LPM6_TBL8_GROUP_NUM_ENTRIES);
	+
	+ *tbl_next = &lpm->tbl8[tbl8_index];
	+
	+ return 1;
	+ } else {
	+ /* If not extended then we can have a match. */
	+ *next_hop = ((uint32_t)tbl_entry & RTE_LPM6_TBL8_BITMASK);
	+ return (tbl_entry & RTE_LPM6_LOOKUP_SUCCESS) ? 0 : -ENOENT;
	+ }
	+}
	+
	+/*
	+ * Looks up an IP
	+ */
	+int
	+rte_lpm6_lookup(const struct rte_lpm6 lpm, const uint8_t ip,
	+ uint32_t *next_hop)
	+{
	+ const struct rte_lpm6_tbl_entry *tbl;
	+ const struct rte_lpm6_tbl_entry *tbl_next = NULL;
	+ int status;
	+ uint8_t first_byte;
	+ uint32_t tbl24_index;
	+
	+ /* DEBUG: Check user input arguments. */
	+ if ((lpm == NULL) \|\| (ip == NULL) \|\| (next_hop == NULL))
	+ return -EINVAL;
	+
	+ first_byte = LOOKUP_FIRST_BYTE;
	+ tbl24_index = (ip[0] << BYTES2_SIZE) \| (ip[1] << BYTE_SIZE) \| ip[2];
	+
	+ /* Calculate pointer to the first entry to be inspected */
	+ tbl = &lpm->tbl24[tbl24_index];
	+
	+ do {
	+ /* Continue inspecting following levels until success or failure */
	+ status = lookup_step(lpm, tbl, &tbl_next, ip, first_byte++, next_hop);
	+ tbl = tbl_next;
	+ } while (status == 1);
	+
	+ return status;
	+}
	+
	+/*
	+ * Looks up a group of IP addresses
	+ */
	+int
	+rte_lpm6_lookup_bulk_func(const struct rte_lpm6 *lpm,
	+ uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE],
	+ int32_t *next_hops, unsigned int n)
	+{
	+ unsigned int i;
	+ const struct rte_lpm6_tbl_entry *tbl;
	+ const struct rte_lpm6_tbl_entry *tbl_next = NULL;
	+ uint32_t tbl24_index, next_hop;
	+ uint8_t first_byte;
	+ int status;
	+
	+ /* DEBUG: Check user input arguments. */
	+ if ((lpm == NULL) \|\| (ips == NULL) \|\| (next_hops == NULL))
	+ return -EINVAL;
	+
	+ for (i = 0; i < n; i++) {
	+ first_byte = LOOKUP_FIRST_BYTE;
	+ tbl24_index = (ips[i][0] << BYTES2_SIZE) \|
	+ (ips[i][1] << BYTE_SIZE) \| ips[i][2];
	+
	+ /* Calculate pointer to the first entry to be inspected */
	+ tbl = &lpm->tbl24[tbl24_index];
	+
	+ do {
	+ /* Continue inspecting following levels
	+ * until success or failure
	+ */
	+ status = lookup_step(lpm, tbl, &tbl_next, ips[i],
	+ first_byte++, &next_hop);
	+ tbl = tbl_next;
	+ } while (status == 1);
	+
	+ if (status < 0)
	+ next_hops[i] = -1;
	+ else
	+ next_hops[i] = (int32_t)next_hop;
	+ }
	+
	+ return 0;
	+}
	+
	+struct rte_lpm6_rule *
	+fill_rule6(char buffer, const uint8_t ip, uint8_t depth, uint32_t next_hop)
	+{
	+ struct rte_lpm6_rule rule = (struct rte_lpm6_rule )buffer;
	+
	+ ip6_copy_addr((uint8_t *)&rule->ip, ip);
	+ rule->depth = depth;
	+ rule->next_hop = next_hop;
	+
	+ return (rule);
	+}
	+
	+#if 0
	+/*
	+ * Look for a rule in the high-level rules table
	+ */
	+int
	+rte_lpm6_is_rule_present(struct rte_lpm6 lpm, const uint8_t ip, uint8_t depth,
	+ uint32_t *next_hop)
	+{
	+ uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE];
	+
	+ /* Check user arguments. */
	+ if ((lpm == NULL) \|\| next_hop == NULL \|\| ip == NULL \|\|
	+ (depth < 1) \|\| (depth > RTE_LPM6_MAX_DEPTH))
	+ return -EINVAL;
	+
	+ /* Copy the IP and mask it to avoid modifying user's input data. */
	+ ip6_copy_addr(masked_ip, ip);
	+ ip6_mask_addr(masked_ip, depth);
	+
	+ return rule_find(lpm, masked_ip, depth, next_hop);
	+}
	+
	+/*
	+ * Delete a rule from the rule table.
	+ * NOTE: Valid range for depth parameter is 1 .. 128 inclusive.
	+ * return
	+ * 0 on success
	+ * <0 on failure
	+ */
	+static inline int
	+rule_delete(struct rte_lpm6 lpm, uint8_t ip, uint8_t depth)
	+{
	+ int ret;
	+ struct rte_lpm6_rule_key rule_key;
	+
	+ /* init rule key */
	+ rule_key_init(&rule_key, ip, depth);
	+
	+ /* delete the rule */
	+ ret = rte_hash_del_key(lpm->rules_tbl, (void *) &rule_key);
	+ if (ret >= 0)
	+ lpm->used_rules--;
	+
	+ return ret;
	+}
	+
	+/*
	+ * Deletes a group of rules
	+ *
	+ * Note that the function rebuilds the lpm table,
	+ * rather than doing incremental updates like
	+ * the regular delete function
	+ */
	+int
	+rte_lpm6_delete_bulk_func(struct rte_lpm6 *lpm,
	+ uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], uint8_t *depths,
	+ unsigned n)
	+{
	+ uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE];
	+ unsigned i;
	+
	+ /* Check input arguments. */
	+ if ((lpm == NULL) \|\| (ips == NULL) \|\| (depths == NULL))
	+ return -EINVAL;
	+
	+ for (i = 0; i < n; i++) {
	+ ip6_copy_addr(masked_ip, ips[i]);
	+ ip6_mask_addr(masked_ip, depths[i]);
	+ rule_delete(lpm, masked_ip, depths[i]);
	+ }
	+
	+ /*
	+ * Set all the table entries to 0 (ie delete every rule
	+ * from the data structure.
	+ */
	+ memset(lpm->tbl24, 0, sizeof(lpm->tbl24));
	+ memset(lpm->tbl8, 0, sizeof(lpm->tbl8[0])
	+ * RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * lpm->number_tbl8s);
	+ tbl8_pool_init(lpm);
	+
	+ /*
	+ * Add every rule again (except for the ones that were removed from
	+ * the rules table).
	+ */
	+ rebuild_lpm(lpm);
	+
	+ return 0;
	+}
	+
	+/*
	+ * Delete all rules from the LPM table.
	+ */
	+void
	+rte_lpm6_delete_all(struct rte_lpm6 *lpm)
	+{
	+ /* Zero used rules counter. */
	+ lpm->used_rules = 0;
	+
	+ /* Zero tbl24. */
	+ memset(lpm->tbl24, 0, sizeof(lpm->tbl24));
	+
	+ /* Zero tbl8. */
	+ memset(lpm->tbl8, 0, sizeof(lpm->tbl8[0]) *
	+ RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * lpm->number_tbl8s);
	+
	+ /* init pool of free tbl8 indexes */
	+ tbl8_pool_init(lpm);
	+
	+ /* Delete all rules form the rules table. */
	+ rte_hash_reset(lpm->rules_tbl);
	+}
	+#endif
	+
	+/*
	+ * Convert a depth to a one byte long mask
	+ * Example: 4 will be converted to 0xF0
	+ */
	+static uint8_t __attribute__((pure))
	+depth_to_mask_1b(uint8_t depth)
	+{
	+ /* To calculate a mask start with a 1 on the left hand side and right
	+ * shift while populating the left hand side with 1's
	+ */
	+ return (signed char)0x80 >> (depth - 1);
	+}
	+
	+#if 0
	+/*
	+ * Find a less specific rule
	+ */
	+static int
	+rule_find_less_specific(struct rte_lpm6 lpm, uint8_t ip, uint8_t depth,
	+ struct rte_lpm6_rule *rule)
	+{
	+ int ret;
	+ uint32_t next_hop;
	+ uint8_t mask;
	+ struct rte_lpm6_rule_key rule_key;
	+
	+ if (depth == 1)
	+ return 0;
	+
	+ rule_key_init(&rule_key, ip, depth);
	+
	+ while (depth > 1) {
	+ depth--;
	+
	+ /* each iteration zero one more bit of the key */
	+ mask = depth & 7; /* depth % BYTE_SIZE */
	+ if (mask > 0)
	+ mask = depth_to_mask_1b(mask);
	+
	+ rule_key.depth = depth;
	+ rule_key.ip[depth >> 3] &= mask;
	+
	+ ret = rule_find_with_key(lpm, &rule_key, &next_hop);
	+ if (ret) {
	+ rule->depth = depth;
	+ ip6_copy_addr(rule->ip, rule_key.ip);
	+ rule->next_hop = next_hop;
	+ return 1;
	+ }
	+ }
	+
	+ return 0;
	+}
	+#endif
	+
	+/*
	+ * Find range of tbl8 cells occupied by a rule
	+ */
	+static void
	+rule_find_range(struct rte_lpm6 lpm, const uint8_t ip, uint8_t depth,
	+ struct rte_lpm6_tbl_entry **from,
	+ struct rte_lpm6_tbl_entry **to,
	+ uint32_t *out_tbl_ind)
	+{
	+ uint32_t ind;
	+ uint32_t first_3bytes = (uint32_t)ip[0] << 16 \| ip[1] << 8 \| ip[2];
	+
	+ if (depth <= 24) {
	+ /* rule is within the top level */
	+ ind = first_3bytes;
	+ *from = &lpm->tbl24[ind];
	+ ind += (1 << (24 - depth)) - 1;
	+ *to = &lpm->tbl24[ind];
	+ *out_tbl_ind = TBL24_IND;
	+ } else {
	+ /* top level entry */
	+ struct rte_lpm6_tbl_entry *tbl = &lpm->tbl24[first_3bytes];
	+ assert(tbl->ext_entry == 1);
	+ /* first tbl8 */
	+ uint32_t tbl_ind = tbl->lpm6_tbl8_gindex;
	+ tbl = &lpm->tbl8[tbl_ind *
	+ RTE_LPM6_TBL8_GROUP_NUM_ENTRIES];
	+ /* current ip byte, the top level is already behind */
	+ uint8_t byte = 3;
	+ /* minus top level */
	+ depth -= 24;
	+
	+ /* iterate through levels (tbl8s)
	+ * until we reach the last one
	+ */
	+ while (depth > 8) {
	+ tbl += ip[byte];
	+ assert(tbl->ext_entry == 1);
	+ /* go to the next level/tbl8 */
	+ tbl_ind = tbl->lpm6_tbl8_gindex;
	+ tbl = &lpm->tbl8[tbl_ind *
	+ RTE_LPM6_TBL8_GROUP_NUM_ENTRIES];
	+ byte += 1;
	+ depth -= 8;
	+ }
	+
	+ /* last level/tbl8 */
	+ ind = ip[byte] & depth_to_mask_1b(depth);
	+ *from = &tbl[ind];
	+ ind += (1 << (8 - depth)) - 1;
	+ *to = &tbl[ind];
	+ *out_tbl_ind = tbl_ind;
	+ }
	+}
	+
	+/*
	+ * Remove a table from the LPM tree
	+ */
	+static void
	+remove_tbl(struct rte_lpm6 lpm, struct rte_lpm_tbl8_hdr tbl_hdr,
	+ uint32_t tbl_ind, struct rte_lpm6_rule *lsp_rule)
	+{
	+ struct rte_lpm6_tbl_entry *owner_entry;
	+
	+ if (tbl_hdr->owner_tbl_ind == TBL24_IND)
	+ owner_entry = &lpm->tbl24[tbl_hdr->owner_entry_ind];
	+ else {
	+ uint32_t owner_tbl_ind = tbl_hdr->owner_tbl_ind;
	+ owner_entry = &lpm->tbl8[
	+ owner_tbl_ind * RTE_LPM6_TBL8_GROUP_NUM_ENTRIES +
	+ tbl_hdr->owner_entry_ind];
	+
	+ struct rte_lpm_tbl8_hdr *owner_tbl_hdr =
	+ &lpm->tbl8_hdrs[owner_tbl_ind];
	+ if (--owner_tbl_hdr->ref_cnt == 0)
	+ remove_tbl(lpm, owner_tbl_hdr, owner_tbl_ind, lsp_rule);
	+ }
	+
	+ assert(owner_entry->ext_entry == 1);
	+
	+ /* unlink the table */
	+ if (lsp_rule != NULL) {
	+ struct rte_lpm6_tbl_entry new_tbl_entry = {
	+ .next_hop = lsp_rule->next_hop,
	+ .depth = lsp_rule->depth,
	+ .valid = VALID,
	+ .valid_group = VALID,
	+ .ext_entry = 0
	+ };
	+
	+ *owner_entry = new_tbl_entry;
	+ } else {
	+ struct rte_lpm6_tbl_entry new_tbl_entry = {
	+ .next_hop = 0,
	+ .depth = 0,
	+ .valid = INVALID,
	+ .valid_group = INVALID,
	+ .ext_entry = 0
	+ };
	+
	+ *owner_entry = new_tbl_entry;
	+ }
	+
	+ /* return the table to the pool */
	+ tbl8_put(lpm, tbl_ind);
	+}
	+
	+/*
	+ * Deletes a rule
	+ */
	+int
	+rte_lpm6_delete(struct rte_lpm6 lpm, const uint8_t ip, uint8_t depth,
	+ struct rte_lpm6_rule *lsp_rule)
	+{
	+ uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE];
	+ //struct rte_lpm6_rule lsp_rule_obj;
	+ //struct rte_lpm6_rule *lsp_rule;
	+ //int ret;
	+ uint32_t tbl_ind;
	+ struct rte_lpm6_tbl_entry from, to;
	+
	+ /* Check input arguments. */
	+ if ((lpm == NULL) \|\| (depth < 1) \|\| (depth > RTE_LPM6_MAX_DEPTH))
	+ return -EINVAL;
	+
	+ /* Copy the IP and mask it to avoid modifying user's input data. */
	+ ip6_copy_addr(masked_ip, ip);
	+ ip6_mask_addr(masked_ip, depth);
	+
	+#if 0
	+ /* Delete the rule from the rule table. */
	+ ret = rule_delete(lpm, masked_ip, depth);
	+ if (ret < 0)
	+ return -ENOENT;
	+#endif
	+
	+ /* find rule cells */
	+ rule_find_range(lpm, masked_ip, depth, &from, &to, &tbl_ind);
	+
	+#if 0
	+ /* find a less specific rule (a rule with smaller depth)
	+ * note: masked_ip will be modified, don't use it anymore
	+ */
	+ ret = rule_find_less_specific(lpm, masked_ip, depth,
	+ &lsp_rule_obj);
	+ lsp_rule = ret ? &lsp_rule_obj : NULL;
	+#endif
	+ /* decrement the table rule counter,
	+ * note that tbl24 doesn't have a header
	+ */
	+ if (tbl_ind != TBL24_IND) {
	+ struct rte_lpm_tbl8_hdr *tbl_hdr = &lpm->tbl8_hdrs[tbl_ind];
	+ if (--tbl_hdr->ref_cnt == 0) {
	+ /* remove the table */
	+ remove_tbl(lpm, tbl_hdr, tbl_ind, lsp_rule);
	+ return 0;
	+ }
	+ }
	+
	+ /* iterate rule cells */
	+ for (; from <= to; from++)
	+ if (from->ext_entry == 1) {
	+ /* reference to a more specific space
	+ * of the prefix/rule. Entries in a more
	+ * specific space that are not used by
	+ * a more specific prefix must be occupied
	+ * by the prefix
	+ */
	+ if (lsp_rule != NULL)
	+ expand_rule(lpm,
	+ from->lpm6_tbl8_gindex *
	+ RTE_LPM6_TBL8_GROUP_NUM_ENTRIES,
	+ depth, lsp_rule->depth,
	+ lsp_rule->next_hop, VALID);
	+ else
	+ /* since the prefix has no less specific prefix,
	+ * its more specific space must be invalidated
	+ */
	+ expand_rule(lpm,
	+ from->lpm6_tbl8_gindex *
	+ RTE_LPM6_TBL8_GROUP_NUM_ENTRIES,
	+ depth, 0, 0, INVALID);
	+ } else if (from->depth == depth) {
	+ /* entry is not a reference and belongs to the prefix */
	+ if (lsp_rule != NULL) {
	+ struct rte_lpm6_tbl_entry new_tbl_entry = {
	+ .next_hop = lsp_rule->next_hop,
	+ .depth = lsp_rule->depth,
	+ .valid = VALID,
	+ .valid_group = VALID,
	+ .ext_entry = 0
	+ };
	+
	+ *from = new_tbl_entry;
	+ } else {
	+ struct rte_lpm6_tbl_entry new_tbl_entry = {
	+ .next_hop = 0,
	+ .depth = 0,
	+ .valid = INVALID,
	+ .valid_group = INVALID,
	+ .ext_entry = 0
	+ };
	+
	+ *from = new_tbl_entry;
	+ }
	+ }
	+
	+ return 0;
	+}
	Index: sys/contrib/dpdk_rte_lpm/rte_shim.h
	===================================================================
	--- /dev/null
	+++ sys/contrib/dpdk_rte_lpm/rte_shim.h
	@@ -0,0 +1,31 @@
	+#ifndef _RTE_SHIM_H_
	+#define _RTE_SHIM_H_
	+
	+#define rte_malloc(_type, _size, _align) malloc(_size, M_TEMP, M_NOWAIT)
	+#define rte_free(_ptr) free(_ptr, M_TEMP)
	+#define rte_zmalloc(_type, _size, _align) malloc(_size, M_TEMP, M_NOWAIT \| M_ZERO)
	+#define rte_zmalloc_socket(_type, _size, _align, _s) malloc(_size, M_TEMP, M_NOWAIT \| M_ZERO)
	+
	+#define rte_mcfg_tailq_write_unlock()
	+#define rte_mcfg_tailq_write_lock()
	+
	+#define RTE_CACHE_LINE_SIZE CACHE_LINE_SIZE
	+#define strtoull strtoul
	+#define assert(_s) KASSERT((_s), ("DPDK: assert failed"))
	+#define rte_memcpy memcpy
	+#define rte_strerror(_err) "strerror_not_implemented"
	+#define RTE_LOG(_sev, _sub, _fmt, ...) printf("DPDK::" #_sev "::" #_sub " %s: " _fmt, __func__ , ## __VA_ARGS__)
	+
	+#include "sys/endian.h"
	+#define RTE_BYTE_ORDER BYTE_ORDER
	+#define RTE_LITTLE_ENDIAN LITTLE_ENDIAN
	+#define RTE_BIG_ENDIAN BIG_ENDIAN
	+
	+#include "sys/limits.h" // CHAR_BIT
	+#define rte_le_to_cpu_32 le32toh
	+
	+#include "rte_jhash.h"
	+#include "rte_common.h"
	+
	+
	+#endif
	Index: sys/contrib/dpdk_rte_lpm/rte_tailq.h
	===================================================================
	--- /dev/null
	+++ sys/contrib/dpdk_rte_lpm/rte_tailq.h
	@@ -0,0 +1,140 @@
	+/* SPDX-License-Identifier: BSD-3-Clause
	+ * Copyright(c) 2010-2014 Intel Corporation
	+ */
	+
	+#ifndef _RTE_TAILQ_H_
	+#define _RTE_TAILQ_H_
	+
	+/**
	+ * @file
	+ * Here defines rte_tailq APIs for only internal use
	+ *
	+ */
	+
	+#ifdef __cplusplus
	+extern "C" {
	+#endif
	+
	+#include <sys/queue.h>
	+//#include <stdio.h>
	+#include <netinet6/rte_debug.h>
	+
	+/** dummy structure type used by the rte_tailq APIs */
	+struct rte_tailq_entry {
	+ TAILQ_ENTRY(rte_tailq_entry) next; /*< Pointer entries for a tailq list /
	+ void data; /< Pointer to the data referenced by this tailq entry /
	+};
	+/** dummy */
	+TAILQ_HEAD(rte_tailq_entry_head, rte_tailq_entry);
	+
	+#define RTE_TAILQ_NAMESIZE 32
	+
	+/**
	+ * The structure defining a tailq header entry for storing
	+ * in the rte_config structure in shared memory. Each tailq
	+ * is identified by name.
	+ * Any library storing a set of objects e.g. rings, mempools, hash-tables,
	+ * is recommended to use an entry here, so as to make it easy for
	+ * a multi-process app to find already-created elements in shared memory.
	+ */
	+struct rte_tailq_head {
	+ struct rte_tailq_entry_head tailq_head; /*< NOTE: must be first element /
	+ char name[RTE_TAILQ_NAMESIZE];
	+};
	+
	+struct rte_tailq_elem {
	+ /**
	+ * Reference to head in shared mem, updated at init time by
	+ * rte_eal_tailqs_init()
	+ */
	+ struct rte_tailq_head *head;
	+ TAILQ_ENTRY(rte_tailq_elem) next;
	+ const char name[RTE_TAILQ_NAMESIZE];
	+};
	+
	+/**
	+ * Return the first tailq entry cast to the right struct.
	+ */
	+#define RTE_TAILQ_CAST(tailq_entry, struct_name) \
	+ (struct struct_name *)&(tailq_entry)->tailq_head
	+
	+/**
	+ * Utility macro to make looking up a tailqueue for a particular struct easier.
	+ *
	+ * @param name
	+ * The name of tailq
	+ *
	+ * @param struct_name
	+ * The name of the list type we are using. (Generally this is the same as the
	+ * first parameter passed to TAILQ_HEAD macro)
	+ *
	+ * @return
	+ * The return value from rte_eal_tailq_lookup, typecast to the appropriate
	+ * structure pointer type.
	+ * NULL on error, since the tailq_head is the first
	+ * element in the rte_tailq_head structure.
	+ */
	+#define RTE_TAILQ_LOOKUP(name, struct_name) \
	+ RTE_TAILQ_CAST(rte_eal_tailq_lookup(name), struct_name)
	+
	+/**
	+ * Dump tail queues to a file.
	+ *
	+ * @param f
	+ * A pointer to a file for output
	+ */
	+//void rte_dump_tailq(FILE *f);
	+
	+/**
	+ * Lookup for a tail queue.
	+ *
	+ * Get a pointer to a tail queue header of a tail
	+ * queue identified by the name given as an argument.
	+ * Note: this function is not multi-thread safe, and should only be called from
	+ * a single thread at a time
	+ *
	+ * @param name
	+ * The name of the queue.
	+ * @return
	+ * A pointer to the tail queue head structure.
	+ */
	+struct rte_tailq_head rte_eal_tailq_lookup(const char name);
	+
	+/**
	+ * Register a tail queue.
	+ *
	+ * Register a tail queue from shared memory.
	+ * This function is mainly used by EAL_REGISTER_TAILQ macro which is used to
	+ * register tailq from the different dpdk libraries. Since this macro is a
	+ * constructor, the function has no access to dpdk shared memory, so the
	+ * registered tailq can not be used before call to rte_eal_init() which calls
	+ * rte_eal_tailqs_init().
	+ *
	+ * @param t
	+ * The tailq element which contains the name of the tailq you want to
	+ * create (/retrieve when in secondary process).
	+ * @return
	+ * 0 on success or -1 in case of an error.
	+ */
	+int rte_eal_tailq_register(struct rte_tailq_elem *t);
	+
	+#define EAL_REGISTER_TAILQ(t) \
	+RTE_INIT(tailqinitfn_ ##t) \
	+{ \
	+ if (rte_eal_tailq_register(&t) < 0) \
	+ rte_panic("Cannot initialize tailq: %s\n", t.name); \
	+}
	+
	+/* This macro permits both remove and free var within the loop safely.*/
	+#ifndef TAILQ_FOREACH_SAFE
	+#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \
	+ for ((var) = TAILQ_FIRST((head)); \
	+ (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \
	+ (var) = (tvar))
	+#endif
	+
	+#ifdef __cplusplus
	+}
	+#endif
	+
	+#endif /* _RTE_TAILQ_H_ */
	Index: sys/modules/dpdk_lpm4/Makefile
	===================================================================
	--- /dev/null
	+++ sys/modules/dpdk_lpm4/Makefile
	@@ -0,0 +1,12 @@
	+# $FreeBSD$
	+
	+SYSDIR?=${SRCTOP}/sys
	+.include "${SYSDIR}/conf/kern.opts.mk"
	+
	+.PATH: ${SYSDIR}/contrib/dpdk_rte_lpm
	+
	+KMOD= dpdk_lpm4
	+SRCS= opt_inet.h
	+SRCS.INET=dpdk_lpm.c rte_lpm.c
	+
	+.include <bsd.kmod.mk>
	Index: sys/modules/dpdk_lpm6/Makefile
	===================================================================
	--- /dev/null
	+++ sys/modules/dpdk_lpm6/Makefile
	@@ -0,0 +1,12 @@
	+# $FreeBSD$
	+
	+SYSDIR?=${SRCTOP}/sys
	+.include "${SYSDIR}/conf/kern.opts.mk"
	+
	+.PATH: ${SYSDIR}/contrib/dpdk_rte_lpm
	+
	+KMOD= dpdk_lpm6
	+SRCS= opt_inet6.h
	+SRCS.INET6=dpdk_lpm6.c rte_lpm6.c
	+
	+.include <bsd.kmod.mk>
	Index: sys/net/route.h
	===================================================================
	--- sys/net/route.h
	+++ sys/net/route.h
	@@ -230,6 +230,7 @@

	/* Control plane route request flags */
	#define NHR_COPY 0x100 /* Copy rte data */
	+#define NHR_UNLOCKED 0x200 /* Do not lock table */

	/*
	* Routing statistics.
	@@ -454,6 +455,8 @@
	/* New API */
	struct nhop_object rib_lookup(uint32_t fibnum, const struct sockaddr dst,
	uint32_t flags, uint32_t flowid);
	+struct rib_rtable_info;
	+bool rib_get_rtable_info(uint32_t fibnum, int family, struct rib_rtable_info *info);
	#endif

	#endif
	Index: sys/net/route.c
	===================================================================
	--- sys/net/route.c
	+++ sys/net/route.c
	@@ -155,6 +155,12 @@
	rt_table_destroy(struct rib_head *rh)
	{

	+ RIB_WLOCK(rh);
	+ rh->rib_dying = true;
	+ RIB_WUNLOCK(rh);
	+
	+ fib_destroy_rib(rh);
	+
	tmproutes_destroy(rh);

	rn_walktree(&rh->rmhead.head, rt_freeentry, &rh->rmhead.head);
	Index: sys/net/route/nhgrp_ctl.c
	===================================================================
	--- sys/net/route/nhgrp_ctl.c
	+++ sys/net/route/nhgrp_ctl.c
	@@ -293,6 +293,17 @@
	return (nhg_priv);
	}

	+void
	+nhgrp_ref_object(struct nhgrp_object *nhg)
	+{
	+ struct nhgrp_priv *nhg_priv;
	+ u_int old;
	+
	+ nhg_priv = NHGRP_PRIV(nhg);
	+ old = refcount_acquire(&nhg_priv->nhg_refcount);
	+ KASSERT(old > 0, ("%s: nhgrp object %p has 0 refs", __func__, nhg));
	+}
	+
	void
	nhgrp_free(struct nhgrp_object *nhg)
	{
	@@ -753,6 +764,21 @@
	return (error);
	}

	+uint32_t
	+nhgrp_get_count(struct rib_head *rh)
	+{
	+ struct nh_control *ctl;
	+ uint32_t count;
	+
	+ ctl = rh->nh_control;
	+
	+ NHOPS_RLOCK(ctl);
	+ count = ctl->gr_head.items_count;
	+ NHOPS_RUNLOCK(ctl);
	+
	+ return (count);
	+}
	+
	uint32_t
	nhgrp_get_idx(const struct nhgrp_object *nhg)
	{
	Index: sys/net/route/nhop_ctl.c
	===================================================================
	--- sys/net/route/nhop_ctl.c
	+++ sys/net/route/nhop_ctl.c
	@@ -690,6 +690,19 @@
	&nh_priv->nh_epoch_ctx);
	}

	+void
	+nhop_ref_any(struct nhop_object *nh)
	+{
	+#ifdef ROUTE_MPATH
	+ if (!NH_IS_NHGRP(nh))
	+ nhop_ref_object(nh);
	+ else
	+ nhgrp_ref_object((struct nhgrp_object *)nh);
	+#else
	+ nhop_ref_object(nh);
	+#endif
	+}
	+
	void
	nhop_free_any(struct nhop_object *nh)
	{
	@@ -852,6 +865,21 @@
	return (error);
	}

	+uint32_t
	+nhops_get_count(struct rib_head *rh)
	+{
	+ struct nh_control *ctl;
	+ uint32_t count;
	+
	+ ctl = rh->nh_control;
	+
	+ NHOPS_RLOCK(ctl);
	+ count = ctl->nh_head.items_count;
	+ NHOPS_RUNLOCK(ctl);
	+
	+ return (count);
	+}
	+
	int
	nhops_dump_sysctl(struct rib_head rh, struct sysctl_req w)
	{
	Index: sys/net/route/route_algo.h
	===================================================================
	--- /dev/null
	+++ sys/net/route/route_algo.h
	@@ -0,0 +1,110 @@
	+/*-
	+ * Copyright (c) 2020
	+ * Alexander V. Chernikov <melifaro@FreeBSD.org>
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ * 3. Neither the name of the University nor the names of its contributors
	+ * may be used to endorse or promote products derived from this software
	+ * without specific prior written permission.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ *
	+ * $FreeBSD$
	+ */
	+
	+
	+struct fib_data;
	+struct fib_dp;
	+enum flm_op_result {
	+ FLM_SUCCESS, /* No errors, operation successful */
	+ FLM_REBUILD, /* Operation cannot be completed, schedule algorithm rebuild */
	+ FLM_ERROR, /* Operation failed, this algo cannot be used */
	+};
	+
	+struct rib_rtable_info {
	+ uint32_t num_prefixes;
	+ uint32_t num_nhops;
	+ uint32_t num_nhgrp;
	+};
	+
	+struct flm_lookup_key {
	+ union {
	+ const struct in6_addr *addr6;
	+ struct in_addr addr4;
	+ };
	+};
	+
	+typedef struct nhop_object flm_lookup_t(void algo_data,
	+ const struct flm_lookup_key key, uint32_t scopeid);
	+typedef enum flm_op_result flm_init_t (uint32_t fibnum, struct fib_data *fd,
	+ void _old_data, void *new_data);
	+typedef void flm_destroy_t(void *data);
	+typedef enum flm_op_result flm_dump_t(struct rtentry rt, void data);
	+typedef enum flm_op_result flm_dump_end_t(void data, struct fib_dp dp);
	+typedef enum flm_op_result flm_change_t(struct rib_head *rnh,
	+ struct rib_cmd_info rc, void data);
	+typedef uint8_t flm_get_pref_t(const struct rib_rtable_info *rinfo);
	+
	+#define FIB_M_NEED_NHOPS 0x01 /* need nexthop index map allocation */
	+#define FIB_M_NO_CALLOUT 0x02 /* does not need callouts */
	+
	+struct fib_lookup_module {
	+ char flm_name; / algo name */
	+ int flm_family; /* address family this module supports */
	+ int flm_refcount; /* # of references */
	+ uint32_t flm_flags; /* flags */
	+ flm_init_t flm_init_cb; / instance init */
	+ flm_destroy_t flm_destroy_cb; / destroy instance */
	+ flm_change_t flm_change_rib_item_cb;/ routing table change hook */
	+ flm_dump_t flm_dump_rib_item_cb; / routing table dump cb */
	+ flm_dump_end_t flm_dump_end_cb; / end of dump */
	+ flm_lookup_t flm_lookup; / lookup function */
	+ flm_get_pref_t flm_get_pref; / get algo preference */
	+ TAILQ_ENTRY(fib_lookup_module) entries;
	+};
	+
	+/* Datapath lookup data */
	+struct fib_dp {
	+ flm_lookup_t *f;
	+ void *arg;
	+};
	+
	+VNET_DECLARE(struct fib_dp *, inet_dp);
	+#define V_inet_dp VNET(inet_dp)
	+VNET_DECLARE(struct fib_dp *, inet6_dp);
	+#define V_inet6_dp VNET(inet6_dp)
	+
	+int fib_module_init(struct fib_lookup_module *flm, uint32_t fibnum,
	+ int family);
	+int fib_module_clone(const struct fib_lookup_module *flm_orig,
	+ struct fib_lookup_module *flm, bool waitok);
	+int fib_module_dumptree(struct fib_lookup_module *flm,
	+ enum rib_subscription_type subscription_type);
	+int fib_module_register(struct fib_lookup_module *flm);
	+int fib_module_unregister(struct fib_lookup_module *flm);
	+
	+uint32_t fib_get_nhop_idx(struct fib_data fd, struct nhop_object nh);
	+void fib_free_nhop_idx(struct fib_data *fd, uint32_t idx);
	+void fib_free_nhop(struct fib_data fd, struct nhop_object nh);
	+struct nhop_object *fib_get_nhop_array(struct fib_data fd);
	+void fib_get_rtable_info(struct rib_head rh, struct rib_rtable_info rinfo);
	+struct rib_head fib_get_rh(struct fib_data fd);
	+
	+
	Index: sys/net/route/route_algo.c
	===================================================================
	--- /dev/null
	+++ sys/net/route/route_algo.c
	@@ -0,0 +1,1198 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2020 Alexander V. Chernikov
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+#define RTDEBUG
	+
	+#include <sys/cdefs.h>
	+__FBSDID("$FreeBSD$");
	+#include "opt_inet.h"
	+#include "opt_inet6.h"
	+#include "opt_route.h"
	+
	+#include <sys/param.h>
	+#include <sys/eventhandler.h>
	+#include <sys/kernel.h>
	+#include <sys/sbuf.h>
	+#include <sys/lock.h>
	+#include <sys/rmlock.h>
	+#include <sys/malloc.h>
	+#include <sys/mbuf.h>
	+#include <sys/module.h>
	+#include <sys/kernel.h>
	+#include <sys/priv.h>
	+#include <sys/proc.h>
	+#include <sys/socket.h>
	+#include <sys/socketvar.h>
	+#include <sys/sysctl.h>
	+#include <sys/queue.h>
	+#include <net/vnet.h>
	+
	+#include <net/if.h>
	+#include <net/if_var.h>
	+
	+#include <netinet/in.h>
	+#include <netinet/in_var.h>
	+#include <netinet/ip.h>
	+#include <netinet/ip_var.h>
	+#ifdef INET6
	+#include <netinet/ip6.h>
	+#include <netinet6/ip6_var.h>
	+#endif
	+
	+#include <net/route.h>
	+#include <net/route/nhop.h>
	+#include <net/route/route_ctl.h>
	+#include <net/route/route_var.h>
	+#include <net/route/route_algo.h>
	+
	+/*
	+ * Route lookup framework.
	+ *
	+ * flm - fib lookup modules - kernel modules implementing particular algo
	+ * fd - fib data - instance of an flm bound to specific routing table
	+ *
	+ *
	+ * For each supported address family, there is a an allocated array of fib_dp
	+ * structures, indexed by fib number. Each array entry contains callback function
	+ * and its argument. This function will be called with a family-specific lookup key,
	+ * scope and provided argument. This array gets re-created every time when new algo
	+ * instance gets created. Please take a look at the replace_rtables_family() function
	+ * for more details.
	+ *
	+ * Control plane for to setup and update the necessary dataplane structures.
	+ * 1) nexhops abstraction -> module has to deal with index, refcounting, nexhtop groups etc
	+ * 2) sync with route tables
	+ * 3) dataplane attachment points
	+ * 3) fail early. Some algorithms are immutable, so any change leads to rebuild. Some
	+ * are mutable till some extent so the module is build over common setup/teardown
	+ * instances, making error handling * easier.
	+ * 4) preference.
	+ *
	+ */
	+
	+SYSCTL_DECL(_net_route);
	+SYSCTL_NODE(_net_route, OID_AUTO, algo, CTLFLAG_RW \| CTLFLAG_MPSAFE, 0,
	+ "Route algorithm lookups");
	+
	+SYSCTL_NODE(_net_route_algo, OID_AUTO, inet6, CTLFLAG_RW \| CTLFLAG_MPSAFE, 0,
	+ "IPv6 algorithm lookups");
	+SYSCTL_NODE(_net_route_algo, OID_AUTO, inet, CTLFLAG_RW \| CTLFLAG_MPSAFE, 0,
	+ "IPv4 algorithm lookups");
	+
	+struct nhop_ref_table {
	+ uint32_t count;
	+ int32_t refcnt[0];
	+};
	+
	+struct fib_data {
	+ uint32_t number_nhops; /* current # of nhops */
	+ uint32_t number_records; /* current # of routes */
	+ uint8_t hit_nhops; /* true if out of nhop limit */
	+ uint8_t init_done; /* true if init is competed */
	+ uint32_t fd_dead:1; /* Scheduled for deletion */
	+ uint32_t fd_linked:1; /* true if linked */
	+ uint32_t fd_need_rebuild:1; /* true if rebuild scheduled */
	+ uint32_t fd_force_eval:1;/* true if rebuild scheduled */
	+ uint8_t fd_family; /* family */
	+ uint32_t fd_fibnum; /* fibnum */
	+ uint32_t fd_failed_rebuilds; /* stat: failed rebuilds */
	+ struct callout fd_callout; /* rebuild callout */
	+ void fd_algo_data; / algorithm data */
	+ struct nhop_object *nh_idx; / nhop idx->ptr array */
	+ struct nhop_ref_table nh_ref_table; / array with # of nhop references */
	+ struct rib_head fd_rh; / RIB table we're attached to */
	+ struct rib_subscription fd_rs; / storing table subscription */
	+ struct fib_algo_calldata *fa;
	+ struct fib_dp fd_dp; /* fib datapath data */
	+ struct vnet fd_vnet; / vnet nhop belongs to */
	+ struct epoch_context fd_epoch_ctx;
	+ uint64_t gencnt;
	+ struct fib_lookup_module *fd_flm;
	+ uint32_t fd_num_changes; /* number of changes since last callout */
	+ TAILQ_ENTRY(fib_data) entries; /* list of all fds in vnet */
	+};
	+
	+static void rebuild_callout(void *_data);
	+static void destroy_instance_epoch(epoch_context_t ctx);
	+static enum flm_op_result switch_algo(struct fib_data *fd);
	+static struct fib_lookup_module find_algo(const char algo_name, int family);
	+
	+static struct fib_lookup_module fib_check_best_algo(struct rib_head rh,
	+ struct fib_lookup_module *orig_flm);
	+
	+struct mtx fib_mtx;
	+#define MOD_LOCK() mtx_lock(&fib_mtx)
	+#define MOD_UNLOCK() mtx_unlock(&fib_mtx)
	+
	+
	+/* Algorithm has to be this percent better than the current to switch */
	+#define BEST_DIFF_PERCENT (5 * 256 / 100)
	+/* Schedule algo re-evaluation X seconds after a change */
	+#define ALGO_EVAL_DELAY_MS 30000
	+/* Force algo re-evaluation after X changes */
	+#define ALGO_EVAL_NUM_ROUTES 100
	+/* Try to setup algorithm X times */
	+#define FIB_MAX_TRIES 32
	+/* Max amount of supported nexthops */
	+#define FIB_MAX_NHOPS 262144
	+#define FIB_CALLOUT_DELAY_MS 50
	+
	+
	+/* TODO: per-VNET */
	+static TAILQ_HEAD(fib_data_head, fib_data) fib_data_list = TAILQ_HEAD_INITIALIZER(fib_data_list);
	+
	+struct fib_dp_header {
	+ struct epoch_context ffi_epoch_ctx;
	+ uint32_t ffi_num_tables;
	+ struct fib_dp ffi_idx[0];
	+};
	+
	+static TAILQ_HEAD(, fib_lookup_module) all_algo_list;
	+
	+#ifdef RTDEBUG
	+#define RH_PRINTF(_rh, _fmt, ...) printf("[rt_algo] %s.%u %s: " _fmt "\n", \
	+ print_family(_rh->rib_family), _rh->rib_fibnum, __func__ , ## __VA_ARGS__)
	+#define RH_PRINTF_RAW(_fmt, ...) printf("[rt_algo] %s: " _fmt "\n", __func__ , ## __VA_ARGS__)
	+#define FD_PRINTF(fd, _fmt, ...) printf("[rt_algo] %s.%u (%s) %s: " _fmt "\n",\
	+ print_family(fd->fd_family), fd->fd_fibnum, fd->fd_flm->flm_name, __func__, \
	+ ##__VA_ARGS__)
	+#else
	+#define FD_RH_PRINTF(fd, _fmt, ...)
	+#define RH_PRINTF(_fmt, ...)
	+#define RH_PRINTF_RAW(_fmt, ...)
	+#endif
	+
	+static const char *
	+print_family(int family)
	+{
	+ if (family == AF_INET)
	+ return ("inet");
	+ else if (family == AF_INET6)
	+ return ("inet6");
	+ else
	+ return ("unknown");
	+}
	+
	+static int
	+print_algos(struct sysctl_req *req, int family)
	+{
	+ struct fib_lookup_module *flm;
	+ struct sbuf sbuf;
	+ int error, count = 0;
	+
	+ error = sysctl_wire_old_buffer(req, 0);
	+ if (error == 0) {
	+ sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
	+ TAILQ_FOREACH(flm, &all_algo_list, entries) {
	+ if (flm->flm_family == family) {
	+ if (count++ > 0)
	+ sbuf_cat(&sbuf, ", ");
	+ sbuf_cat(&sbuf, flm->flm_name);
	+ }
	+ }
	+ error = sbuf_finish(&sbuf);
	+ sbuf_delete(&sbuf);
	+ }
	+ return (error);
	+}
	+
	+static int
	+print_algos_inet6(SYSCTL_HANDLER_ARGS)
	+{
	+
	+ return (print_algos(req, AF_INET6));
	+}
	+SYSCTL_PROC(_net_route_algo_inet6, OID_AUTO, algo_list,
	+ CTLTYPE_STRING \| CTLFLAG_RD \| CTLFLAG_MPSAFE, NULL, 0,
	+ print_algos_inet6, "A", "List of algos");
	+
	+static int
	+print_algos_inet(SYSCTL_HANDLER_ARGS)
	+{
	+
	+ return (print_algos(req, AF_INET));
	+}
	+SYSCTL_PROC(_net_route_algo_inet, OID_AUTO, algo_list,
	+ CTLTYPE_STRING \| CTLFLAG_RD \| CTLFLAG_MPSAFE, NULL, 0,
	+ print_algos_inet, "A", "List of algos");
	+
	+
	+static struct fib_lookup_module *
	+find_algo(const char *algo_name, int family)
	+{
	+ struct fib_lookup_module *flm;
	+
	+ TAILQ_FOREACH(flm, &all_algo_list, entries) {
	+ if ((strcmp(flm->flm_name, algo_name) == 0) &&
	+ (family == flm->flm_family))
	+ return (flm);
	+ }
	+
	+ return (NULL);
	+}
	+
	+static uint32_t
	+callout_calc_delay(struct fib_data *fd)
	+{
	+ uint32_t shift;
	+
	+ if (fd->fd_failed_rebuilds > 10)
	+ shift = 10;
	+ else
	+ shift = fd->fd_failed_rebuilds;
	+
	+ return ((1 << shift) * FIB_CALLOUT_DELAY_MS);
	+}
	+
	+static void
	+schedule_callout(struct fib_data *fd, int delay_ms)
	+{
	+
	+ callout_reset_sbt(&fd->fd_callout, 0, SBT_1MS * delay_ms,
	+ rebuild_callout, fd, 0);
	+}
	+
	+static void
	+schedule_algo_eval(struct fib_data *fd)
	+{
	+
	+ if (fd->fd_num_changes++ == 0) {
	+ /* Start callout to consider switch */
	+ MOD_LOCK();
	+ if (!callout_pending(&fd->fd_callout))
	+ schedule_callout(fd, ALGO_EVAL_DELAY_MS);
	+ MOD_UNLOCK();
	+ } else if (fd->fd_num_changes > ALGO_EVAL_NUM_ROUTES && !fd->fd_force_eval) {
	+ /* Reset callout to exec immediately */
	+ MOD_LOCK();
	+ if (!fd->fd_need_rebuild) {
	+ fd->fd_force_eval = true;
	+ schedule_callout(fd, 1);
	+ }
	+ MOD_UNLOCK();
	+ }
	+}
	+
	+/*
	+ * rib subscription handler
	+ */
	+static void
	+handle_rtable_change_cb(struct rib_head rnh, struct rib_cmd_info rc,
	+ void *_data)
	+{
	+ struct fib_data fd = (struct fib_data )_data;
	+ enum flm_op_result result;
	+
	+ RIB_WLOCK_ASSERT(rnh);
	+
	+ if (!fd->init_done)
	+ return;
	+
	+ schedule_algo_eval(fd);
	+
	+ result = fd->fd_flm->flm_change_rib_item_cb(rnh, rc, fd->fd_algo_data);
	+
	+ switch (result) {
	+ case FLM_SUCCESS:
	+ break;
	+ case FLM_REBUILD:
	+ /*
	+ * Algo reported inability to handle,
	+ * schedule algo rebuild.
	+ */
	+ MOD_LOCK();
	+ if (!fd->fd_need_rebuild) {
	+ fd->fd_need_rebuild = true;
	+ /*
	+ * Potentially rewrites pending callout
	+ * to re-evaluate algo.
	+ */
	+ FD_PRINTF(fd, "Scheduling rebuilt");
	+ schedule_callout(fd, callout_calc_delay(fd));
	+ }
	+ MOD_UNLOCK();
	+ break;
	+ default:
	+ /*
	+ * Algo reported a non-recoverable error.
	+ * Remove and switch to radix?
	+ */
	+ FD_PRINTF(fd, "algo reported non-recoverable error");
	+ // TODO: switch to radix
	+ }
	+}
	+
	+static void
	+estimate_scale(const struct fib_data old_fd, struct fib_data fd)
	+{
	+
	+ if (old_fd == NULL) {
	+ fd->number_nhops = 16;
	+ return;
	+ }
	+
	+ if (old_fd->hit_nhops && old_fd->number_nhops < FIB_MAX_NHOPS)
	+ fd->number_nhops = 2 * old_fd->number_nhops;
	+ else
	+ fd->number_nhops = old_fd->number_nhops;
	+}
	+
	+struct walk_cbdata {
	+ struct fib_data *fd;
	+ flm_dump_t *func;
	+ enum flm_op_result result;
	+};
	+
	+static void
	+sync_algo_end_cb(struct rib_head rnh, enum rib_walk_hook stage, void _data)
	+{
	+ struct walk_cbdata w = (struct walk_cbdata )_data;
	+ struct fib_data *fd = w->fd;
	+
	+ if (rnh->rib_dying) {
	+ w->result = FLM_ERROR;
	+ return;
	+ }
	+
	+ if (stage != RIB_WALK_HOOK_POST \|\| w->result != FLM_SUCCESS)
	+ return;
	+
	+ if (fd->hit_nhops) {
	+ FD_PRINTF(fd, "ran out of nexthops at %u nhops",
	+ fd->nh_ref_table->count);
	+ w->result = FLM_REBUILD;
	+ return;
	+ }
	+
	+ w->result = fd->fd_flm->flm_dump_end_cb(fd->fd_algo_data, &fd->fd_dp);
	+
	+ if (w->result == FLM_SUCCESS) {
	+ /* Mark init as done to allow routing updates */
	+ fd->init_done = 1;
	+ }
	+}
	+
	+static int
	+sync_algo_cb(struct rtentry rt, void _data)
	+{
	+ struct walk_cbdata w = (struct walk_cbdata )_data;
	+ enum flm_op_result result;
	+
	+ if (w->result == FLM_SUCCESS && w->func) {
	+ result = w->func(rt, w->fd->fd_algo_data);
	+ if (result != FLM_SUCCESS)
	+ w->result = result;
	+ }
	+
	+ return (0);
	+}
	+
	+static enum flm_op_result
	+sync_algo(struct fib_data *fd)
	+{
	+ struct walk_cbdata w;
	+
	+ w.fd = fd;
	+ w.func = fd->fd_flm->flm_dump_rib_item_cb;
	+ w.result = FLM_SUCCESS;
	+
	+ rib_walk_ext_internal(fd->fd_rh, true, sync_algo_cb, sync_algo_end_cb, &w);
	+
	+ FD_PRINTF(fd, "initial dump completed.");
	+
	+ return (w.result);
	+}
	+
	+/*
	+ * Assume already unlinked from datapath
	+ */
	+static int
	+schedule_destroy_instance(struct fib_data *fd, bool in_callout)
	+{
	+ bool is_dead;
	+
	+ NET_EPOCH_ASSERT();
	+
	+ MOD_LOCK();
	+ is_dead = fd->fd_dead;
	+ if (!is_dead)
	+ fd->fd_dead = true;
	+ if (fd->fd_linked) {
	+ TAILQ_REMOVE(&fib_data_list, fd, entries);
	+ fd->fd_linked = false;
	+ }
	+ MOD_UNLOCK();
	+ if (is_dead)
	+ return (0);
	+
	+ FD_PRINTF(fd, "DETACH");
	+
	+ if (fd->fd_rs != NULL)
	+ rib_unsibscribe(fd->fd_rs);
	+
	+ /*
	+ * After rib_unsubscribe() no _new_ handle_rtable_change_cb() calls
	+ * will be executed, hence no _new_ callout schedules will happen.
	+ *
	+ * There can be 3 possible scenarious here:
	+ * 1) we're running inside a callout when we're deleting ourselves
	+ * due to migration to a newer fd
	+ * 2) we're running from rt_table_destroy() and callout is scheduled
	+ * for execution OR is executing
	+ *
	+ * For (2) we need to wait for the callout termination, as the routing table
	+ * will be destroyed after this function returns.
	+ * For (1) we cannot call drain, but can ensure that this is the last invocation.
	+ */
	+
	+ if (in_callout)
	+ callout_stop(&fd->fd_callout);
	+ else
	+ callout_drain(&fd->fd_callout);
	+
	+ /*
	+ * At this moment there are no other pending work scheduled.
	+ */
	+ FD_PRINTF(fd, "destroying old instance");
	+ epoch_call(net_epoch_preempt, destroy_instance_epoch,
	+ &fd->fd_epoch_ctx);
	+
	+ return (0);
	+}
	+
	+void
	+fib_destroy_rib(struct rib_head *rh)
	+{
	+ struct fib_data_head tmp_head = TAILQ_HEAD_INITIALIZER(tmp_head);
	+ struct fib_data fd, fd_tmp;
	+
	+ /*
	+ * Atm we have set is_dying flag on rnh, so all new fd's will
	+ * fail at sync_algo() stage, so nothing new will be added to the list.
	+ */
	+ MOD_LOCK();
	+ TAILQ_FOREACH_SAFE(fd, &fib_data_list, entries, fd_tmp) {
	+ if (fd->fd_rh == rh) {
	+ TAILQ_REMOVE(&fib_data_list, fd, entries);
	+ fd->fd_linked = false;
	+ TAILQ_INSERT_TAIL(&tmp_head, fd, entries);
	+ }
	+ }
	+ MOD_UNLOCK();
	+
	+ /* Pass 2: remove each entry */
	+ TAILQ_FOREACH_SAFE(fd, &tmp_head, entries, fd_tmp) {
	+ schedule_destroy_instance(fd, false);
	+ }
	+}
	+
	+static void
	+destroy_instance(struct fib_data *fd)
	+{
	+
	+ FD_PRINTF(fd, "destroy fd %p", fd);
	+
	+ /* Call destroy callback first */
	+ if (fd->fd_algo_data != NULL)
	+ fd->fd_flm->flm_destroy_cb(fd->fd_algo_data);
	+
	+ /* Nhop table */
	+ if (fd->nh_idx != NULL) {
	+ for (int i = 0; i < fd->number_nhops; i++) {
	+ if (fd->nh_idx[i] != NULL) {
	+ FD_PRINTF(fd, " FREE nhop %d %p", i, fd->nh_idx[i]);
	+ nhop_free_any(fd->nh_idx[i]);
	+ }
	+ }
	+ free(fd->nh_idx, M_RTABLE);
	+ }
	+ if (fd->nh_ref_table != NULL)
	+ free(fd->nh_ref_table, M_RTABLE);
	+
	+ MOD_LOCK();
	+ fd->fd_flm->flm_refcount--;
	+ MOD_UNLOCK();
	+
	+ free(fd, M_RTABLE);
	+}
	+
	+/*
	+ * Epoch callback indicating fd is safe to destroy
	+ */
	+static void
	+destroy_instance_epoch(epoch_context_t ctx)
	+{
	+ struct fib_data *fd;
	+
	+ fd = __containerof(ctx, struct fib_data, fd_epoch_ctx);
	+
	+ destroy_instance(fd);
	+}
	+
	+static enum flm_op_result
	+try_setup_instance(struct fib_lookup_module flm, struct rib_head rh,
	+ struct fib_data old_fd, struct fib_data *pfd)
	+{
	+ struct fib_data *fd;
	+ size_t size;
	+ enum flm_op_result result;
	+
	+ /* Allocate */
	+ fd = malloc(sizeof(struct fib_data), M_RTABLE, M_NOWAIT \| M_ZERO);
	+ if (fd == NULL) {
	+ *pfd = NULL;
	+ return (FLM_REBUILD);
	+ }
	+ *pfd = fd;
	+
	+ estimate_scale(old_fd, fd);
	+
	+ fd->fd_rh = rh;
	+ fd->fd_family = rh->rib_family;
	+ fd->fd_fibnum = rh->rib_fibnum;
	+ callout_init(&fd->fd_callout, 1);
	+ fd->fd_vnet = curvnet;
	+ fd->fd_flm = flm;
	+
	+ /* Allocate nhidx -> nhop_ptr table */
	+ size = fd->number_nhops * sizeof(void *);
	+ //FD_PRINTF(fd, "malloc(%lu)", size);
	+ fd->nh_idx = malloc(size, M_RTABLE, M_NOWAIT \| M_ZERO);
	+ if (fd->nh_idx == NULL) {
	+ FD_PRINTF(fd, "Unable to allocate nhop table idx (sz:%zu)", size);
	+ return (FLM_REBUILD);
	+ }
	+
	+ /* Allocate nhop index refcount table */
	+ size = sizeof(struct nhop_ref_table);
	+ size += fd->number_nhops * sizeof(uint32_t);
	+ //FD_PRINTF(fd, "malloc(%lu)", size);
	+ fd->nh_ref_table = malloc(size, M_RTABLE, M_NOWAIT \| M_ZERO);
	+ if (fd->nh_ref_table == NULL) {
	+ FD_PRINTF(fd, "Unable to allocate nhop refcount table (sz:%zu)", size);
	+ return (FLM_REBUILD);
	+ }
	+
	+ /* Okay, we're ready for algo init */
	+ void *old_algo_data = (old_fd != NULL) ? old_fd->fd_algo_data : NULL;
	+ result = flm->flm_init_cb(fd->fd_fibnum, fd, old_algo_data, &fd->fd_algo_data);
	+ if (result != FLM_SUCCESS)
	+ return (result);
	+
	+ /* Try to subscribe */
	+ if (flm->flm_change_rib_item_cb != NULL) {
	+ fd->fd_rs = rib_subscribe_internal(fd->fd_rh,
	+ handle_rtable_change_cb, fd, RIB_NOTIFY_IMMEDIATE, 0);
	+ if (fd->fd_rs == NULL)
	+ return (FLM_REBUILD);
	+ }
	+
	+ /* Dump */
	+ result = sync_algo(fd);
	+ if (result != FLM_SUCCESS)
	+ return (result);
	+ FD_PRINTF(fd, "DUMP completed successfully.");
	+
	+ MOD_LOCK();
	+ TAILQ_INSERT_TAIL(&fib_data_list, fd, entries);
	+ fd->fd_linked = true;
	+ MOD_UNLOCK();
	+
	+ return (FLM_SUCCESS);
	+}
	+
	+/*
	+ * Sets up algo @flm for table @rh and links it to the datapath.
	+ *
	+ */
	+static enum flm_op_result
	+setup_instance(struct fib_lookup_module flm, struct rib_head rh,
	+ struct fib_data orig_fd, struct fib_data *pfd, bool attach)
	+{
	+ struct fib_data prev_fd, new_fd;
	+ struct epoch_tracker et;
	+ enum flm_op_result result;
	+
	+ prev_fd = orig_fd;
	+ new_fd = NULL;
	+ for (int i = 0; i < FIB_MAX_TRIES; i++) {
	+ NET_EPOCH_ENTER(et);
	+ result = try_setup_instance(flm, rh, prev_fd, &new_fd);
	+
	+ if ((result == FLM_SUCCESS) && attach)
	+ result = switch_algo(new_fd);
	+
	+ if ((prev_fd != NULL) && (prev_fd != orig_fd)) {
	+ schedule_destroy_instance(prev_fd, false);
	+ prev_fd = NULL;
	+ }
	+ NET_EPOCH_EXIT(et);
	+
	+ RH_PRINTF(rh, "try %d: fib algo result: %d", i, result);
	+
	+ if (result == FLM_REBUILD) {
	+ prev_fd = new_fd;
	+ new_fd = NULL;
	+ continue;
	+ }
	+
	+ break;
	+ }
	+
	+ if (result != FLM_SUCCESS) {
	+ /* update failure count */
	+ MOD_LOCK();
	+ if (orig_fd != NULL)
	+ orig_fd->fd_failed_rebuilds++;
	+ MOD_UNLOCK();
	+
	+ NET_EPOCH_ENTER(et);
	+ if ((prev_fd != NULL) && (prev_fd != orig_fd))
	+ schedule_destroy_instance(prev_fd, false);
	+ if (new_fd != NULL) {
	+ schedule_destroy_instance(new_fd, false);
	+ new_fd = NULL;
	+ }
	+ NET_EPOCH_EXIT(et);
	+ }
	+
	+ *pfd = new_fd;
	+ return (result);
	+}
	+
	+static void
	+rebuild_callout(void *_data)
	+{
	+ struct fib_data fd, fd_new;
	+ struct fib_lookup_module *flm_new;
	+ struct epoch_tracker et;
	+ enum flm_op_result result;
	+ bool need_rebuild = false;
	+
	+ fd = (struct fib_data *)_data;
	+
	+ MOD_LOCK();
	+ need_rebuild = fd->fd_need_rebuild;
	+ fd->fd_need_rebuild = false;
	+ fd->fd_force_eval = false;
	+ fd->fd_num_changes = 0;
	+ MOD_UNLOCK();
	+
	+ CURVNET_SET(fd->fd_vnet);
	+
	+ /* First, check if we're still OK to use this algo */
	+ flm_new = fib_check_best_algo(fd->fd_rh, fd->fd_flm);
	+ if ((flm_new == NULL) && (!need_rebuild)) {
	+ /* Keep existing algo, no need to rebuild. */
	+ CURVNET_RESTORE();
	+ return;
	+ }
	+
	+ struct fib_data *fd_tmp = (flm_new == NULL) ? fd : NULL;
	+ result = setup_instance(fd->fd_flm, fd->fd_rh, fd_tmp, &fd_new, true);
	+ if (result != FLM_SUCCESS) {
	+ FD_PRINTF(fd, "table rebuild failed");
	+ CURVNET_RESTORE();
	+ return;
	+ }
	+ FD_PRINTF(fd_new, "switched to new instance");
	+
	+ /* Remove old */
	+ if (fd != NULL) {
	+ NET_EPOCH_ENTER(et);
	+ schedule_destroy_instance(fd, true);
	+ NET_EPOCH_EXIT(et);
	+ }
	+
	+ CURVNET_RESTORE();
	+}
	+
	+static int
	+set_algo_sysctl_handler(SYSCTL_HANDLER_ARGS)
	+{
	+ int error = 0;
	+#if 0
	+ struct epoch_tracker et;
	+ struct fib_lookup_module *flm;
	+ struct fib_data old_fd, fd;
	+ char old_algo_name[32], algo_name[32];
	+ uint32_t fibnum;
	+ int error;
	+
	+ fibnum = RT_DEFAULT_FIB;
	+
	+ if (old_fd == NULL) {
	+ strlcpy(old_algo_name, "radix", sizeof(old_algo_name));
	+ } else {
	+ strlcpy(old_algo_name, fd_ptr->fd_flm->flm_name,
	+ sizeof(old_algo_name));
	+ }
	+ strlcpy(algo_name, old_algo_name, sizeof(algo_name));
	+ error = sysctl_handle_string(oidp, algo_name, sizeof(algo_name), req);
	+ if (error != 0 \|\| req->newptr == NULL)
	+ return (error);
	+
	+ if (strcmp(algo_name, old_algo_name) == 0)
	+ return (0);
	+
	+ if (strcmp(algo_name, "radix") == 0) {
	+ /* teardown old one */
	+ NET_EPOCH_ENTER(et);
	+ MOD_LOCK();
	+ old_fd = fd_ptr;
	+ fd_ptr = NULL;
	+ MOD_UNLOCK();
	+
	+ if (old_fd != NULL)
	+ schedule_destroy_instance(old_fd);
	+ NET_EPOCH_EXIT(et);
	+ return (0);
	+ }
	+
	+ MOD_LOCK();
	+ flm = find_algo(algo_name, AF_INET6);
	+ if (flm != NULL)
	+ flm->flm_refcount++;
	+ MOD_UNLOCK();
	+
	+ if (flm == NULL) {
	+ DPRINTF("unable to find algo %s", algo_name);
	+ return (ESRCH);
	+ }
	+ DPRINTF("inet6.%u: requested fib algo %s", fibnum, algo_name);
	+
	+ fd = setup_instance(flm, fibnum, NULL, &error);
	+
	+ if (error != 0) {
	+ MOD_LOCK();
	+ flm->flm_refcount--;
	+ MOD_UNLOCK();
	+ return (error);
	+ }
	+
	+ MOD_LOCK();
	+ old_fd = fd_ptr;
	+ fd_ptr = fd;
	+ MOD_UNLOCK();
	+
	+ /* Remove old */
	+ NET_EPOCH_ENTER(et);
	+ if (old_fd != NULL) {
	+ error = schedule_destroy_instance(old_fd);
	+ }
	+ NET_EPOCH_EXIT(et);
	+#endif
	+
	+ /* Set new */
	+
	+ /* Drain cb so user can unload the module after userret if so desired */
	+ epoch_drain_callbacks(net_epoch_preempt);
	+
	+ return (error);
	+}
	+SYSCTL_PROC(_net_route_algo_inet6, OID_AUTO, algo,
	+ CTLTYPE_STRING \| CTLFLAG_RW \| CTLFLAG_MPSAFE, NULL, 0,
	+ set_algo_sysctl_handler, "A",
	+ "Set");
	+
	+static void
	+destroy_fdh_epoch(epoch_context_t ctx)
	+{
	+ struct fib_dp_header *ffi;
	+
	+ ffi = __containerof(ctx, struct fib_dp_header, ffi_epoch_ctx);
	+ free(ffi, M_RTABLE);
	+}
	+
	+static struct fib_dp_header *
	+alloc_fib_dp_array(uint32_t num_tables, bool waitok)
	+{
	+ size_t sz;
	+ struct fib_dp_header *ffi;
	+
	+ sz = sizeof(struct fib_dp_header);
	+ sz += sizeof(struct fib_dp) * num_tables;
	+ ffi = malloc(sz, M_RTABLE, (waitok ? M_WAITOK : M_NOWAIT) \| M_ZERO);
	+ if (ffi != NULL)
	+ ffi->ffi_num_tables = num_tables;
	+ return (ffi);
	+}
	+
	+static struct fib_dp_header *
	+get_fib_dp_header(struct fib_dp *dp)
	+{
	+
	+ return (__containerof((void *)dp, struct fib_dp_header, ffi_idx));
	+}
	+
	+/*
	+ * Replace per-family index pool @pdp with a new one which
	+ * contains updated callback/algo data from @fd.
	+ * Returns 0 on success.
	+ */
	+static enum flm_op_result
	+replace_rtables_family(struct fib_dp *pdp, struct fib_data fd)
	+{
	+ struct fib_dp_header new_ffi, old_ffi;
	+
	+ NET_EPOCH_ASSERT();
	+
	+ FD_PRINTF(fd, "[vnet %p] replace with f:%p arg:%p", curvnet, fd->fd_dp.f, fd->fd_dp.arg);
	+
	+ MOD_LOCK();
	+ old_ffi = get_fib_dp_header(*pdp);
	+ new_ffi = alloc_fib_dp_array(old_ffi->ffi_num_tables, false);
	+ FD_PRINTF(fd, "OLD FFI: %p NEW FFI: %p", old_ffi, new_ffi);
	+ if (new_ffi == NULL) {
	+ MOD_UNLOCK();
	+ FD_PRINTF(fd, "error attaching datapath");
	+ return (FLM_REBUILD);
	+ }
	+
	+ memcpy(&new_ffi->ffi_idx[0], &old_ffi->ffi_idx[0],
	+ old_ffi->ffi_num_tables * sizeof(struct fib_dp));
	+ /* Update relevant data structure for @fd */
	+ new_ffi->ffi_idx[fd->fd_fibnum] = fd->fd_dp;
	+
	+ /* Ensure memcpy() writes have completed */
	+ atomic_thread_fence_rel();
	+ /* Set new datapath pointer */
	+ *pdp = &new_ffi->ffi_idx[0];
	+ MOD_UNLOCK();
	+ FD_PRINTF(fd, "update %p -> %p", old_ffi, new_ffi);
	+
	+ epoch_call(net_epoch_preempt, destroy_fdh_epoch,
	+ &old_ffi->ffi_epoch_ctx);
	+
	+ return (FLM_SUCCESS);
	+}
	+
	+static struct fib_dp **
	+get_family_ptr(int family)
	+{
	+ switch (family) {
	+ case AF_INET:
	+ return (&V_inet_dp);
	+ case AF_INET6:
	+ return (&V_inet6_dp);
	+ }
	+ return (NULL);
	+}
	+
	+static enum flm_op_result
	+switch_algo(struct fib_data *fd)
	+{
	+ struct fib_dp **pdp;
	+
	+ pdp = get_family_ptr(fd->fd_family);
	+ return (replace_rtables_family(pdp, fd));
	+}
	+
	+/*
	+ * Grow datapath pointers array.
	+ * Called from sysctl handler on growing number of routing tables.
	+ */
	+static void
	+grow_rtables_family(struct fib_dp **pdp, uint32_t new_num_tables)
	+{
	+ struct fib_dp_header new_fdh, old_fdh = NULL;
	+
	+ new_fdh = alloc_fib_dp_array(new_num_tables, true);
	+
	+ MOD_LOCK();
	+ if (*pdp != NULL) {
	+ old_fdh = get_fib_dp_header(*pdp);
	+ memcpy(&new_fdh->ffi_idx[0], &old_fdh->ffi_idx[0],
	+ old_fdh->ffi_num_tables * sizeof(struct fib_dp));
	+ }
	+
	+ /* Wait till all writes completed */
	+ atomic_thread_fence_rel();
	+
	+ *pdp = &new_fdh->ffi_idx[0];
	+ MOD_UNLOCK();
	+
	+ if (old_fdh != NULL)
	+ epoch_call(net_epoch_preempt, destroy_fdh_epoch,
	+ &old_fdh->ffi_epoch_ctx);
	+}
	+
	+/*
	+ * Grows per-AF arrays of datapath pointers for each supported family.
	+ * Called from fibs resize sysctl handler.
	+ */
	+void
	+fib_grow_rtables(uint32_t new_num_tables)
	+{
	+
	+ grow_rtables_family(get_family_ptr(AF_INET), new_num_tables);
	+ grow_rtables_family(get_family_ptr(AF_INET6), new_num_tables);
	+}
	+
	+void
	+fib_get_rtable_info(struct rib_head rh, struct rib_rtable_info rinfo)
	+{
	+
	+ bzero(rinfo, sizeof(struct rib_rtable_info));
	+ rinfo->num_prefixes = rh->rnh_prefixes;
	+ rinfo->num_nhops = nhops_get_count(rh);
	+ rinfo->num_nhgrp = nhgrp_get_count(rh);
	+}
	+
	+struct rib_head *
	+fib_get_rh(struct fib_data *fd)
	+{
	+
	+ return (fd->fd_rh);
	+}
	+
	+static uint32_t
	+get_nhop_idx(struct nhop_object *nh)
	+{
	+#ifdef ROUTE_MPATH
	+ if (NH_IS_NHGRP(nh))
	+ return (nhgrp_get_idx((struct nhgrp_object )nh) 2 - 1);
	+ else
	+ return (nhop_get_idx(nh) * 2);
	+#else
	+ return (nhop_get_idx(nh));
	+#endif
	+}
	+
	+
	+uint32_t
	+fib_get_nhop_idx(struct fib_data fd, struct nhop_object nh)
	+{
	+ uint32_t idx = get_nhop_idx(nh);
	+
	+ if (idx >= fd->number_nhops) {
	+ fd->hit_nhops = 1;
	+ return (0);
	+ }
	+
	+ if (fd->nh_idx[idx] == NULL) {
	+ nhop_ref_any(nh);
	+ fd->nh_idx[idx] = nh;
	+ fd->nh_ref_table->count++;
	+ FD_PRINTF(fd, " REF nhop %u %p", idx, fd->nh_idx[idx]);
	+ }
	+ fd->nh_ref_table->refcnt[idx]++;
	+
	+ return (idx);
	+}
	+
	+struct nhop_release_data {
	+ struct nhop_object *nh;
	+ struct epoch_context ctx;
	+};
	+
	+static void
	+release_nhop_epoch(epoch_context_t ctx)
	+{
	+ struct nhop_release_data *nrd;
	+
	+ nrd = __containerof(ctx, struct nhop_release_data, ctx);
	+ nhop_free_any(nrd->nh);
	+ free(nrd, M_RTABLE);
	+}
	+
	+static void
	+fib_schedule_release_nhop(struct fib_data fd, struct nhop_object nh)
	+{
	+ struct nhop_release_data *nrd;
	+
	+ nrd = malloc(sizeof(struct nhop_release_data), M_RTABLE, M_NOWAIT \| M_ZERO);
	+ if (nrd != NULL) {
	+ nrd->nh = nh;
	+ epoch_call(net_epoch_preempt, release_nhop_epoch, &nrd->ctx);
	+ } else {
	+ /*
	+ * Unable to allocate memory. Leak nexthop to maintain guarantee
	+ * that each nhop.
	+ */
	+ FD_PRINTF(fd, "unable to allocate structure for nhop %p deletion", nh);
	+ }
	+}
	+
	+void
	+fib_free_nhop_idx(struct fib_data *fd, uint32_t idx)
	+{
	+
	+ KASSERT((idx < fd->number_nhops), ("invalid nhop index"));
	+
	+ fd->nh_ref_table->refcnt[idx]--;
	+ if (fd->nh_ref_table->refcnt[idx] == 0) {
	+ FD_PRINTF(fd, " FREE nhop %d %p", idx, fd->nh_idx[idx]);
	+ fib_schedule_release_nhop(fd, fd->nh_idx[idx]);
	+ }
	+}
	+
	+void
	+fib_free_nhop(struct fib_data fd, struct nhop_object nh)
	+{
	+
	+ fib_free_nhop_idx(fd, get_nhop_idx(nh));
	+}
	+
	+struct nhop_object **
	+fib_get_nhop_array(struct fib_data *fd)
	+{
	+
	+ return (fd->nh_idx);
	+}
	+
	+static struct fib_lookup_module *
	+fib_check_best_algo(struct rib_head rh, struct fib_lookup_module orig_flm)
	+{
	+ uint8_t preference, curr_preference = 0, best_preference = 0;
	+ struct fib_lookup_module flm, best_flm = NULL;
	+ struct rib_rtable_info rinfo;
	+ int candidate_algos = 0;
	+
	+ fib_get_rtable_info(rh, &rinfo);
	+
	+ MOD_LOCK();
	+ TAILQ_FOREACH(flm, &all_algo_list, entries) {
	+ if (flm->flm_family != rh->rib_family)
	+ continue;
	+ candidate_algos++;
	+ preference = flm->flm_get_pref(&rinfo);
	+ if (preference > best_preference) {
	+ best_preference = preference;
	+ best_flm = flm;
	+ }
	+ if (flm == orig_flm)
	+ curr_preference = preference;
	+ }
	+ if (best_flm != NULL && best_flm != orig_flm) {
	+ /* Check */
	+ if (curr_preference + BEST_DIFF_PERCENT < best_preference)
	+ best_flm->flm_refcount++;
	+ else
	+ best_flm = NULL;
	+ } else
	+ best_flm = NULL;
	+ MOD_UNLOCK();
	+
	+ RH_PRINTF(rh, "candidate_algos: %d, curr: %s(%d) result: %s(%d)",
	+ candidate_algos, orig_flm ? orig_flm->flm_name : "NULL", curr_preference,
	+ best_flm ? best_flm->flm_name : "NULL", best_preference);
	+
	+ return (best_flm);
	+}
	+
	+/*
	+ * Called when new route table is created.
	+ * Selects, allocates and attaches fib algo for the table.
	+ */
	+int
	+fib_select_algo_initial(struct rib_head *rh)
	+{
	+ struct fib_lookup_module *flm;
	+ struct fib_data *fd = NULL;
	+ enum flm_op_result result;
	+
	+ flm = fib_check_best_algo(rh, NULL);
	+ if (flm == NULL) {
	+ RH_PRINTF(rh, "no algo selected");
	+ return (ENOENT);
	+ }
	+ RH_PRINTF(rh, "selected algo %s", flm->flm_name);
	+
	+ result = setup_instance(flm, rh, NULL, &fd, false);
	+ RH_PRINTF(rh, "result=%d fd=%p", result, fd);
	+ if (result == FLM_SUCCESS) {
	+ /*
	+ * Attach datapath directly to avoid N reallocations
	+ * during fib growth
	+ */
	+ struct fib_dp_header *fdp;
	+ struct fib_dp **pdp;
	+
	+ pdp = get_family_ptr(rh->rib_family);
	+ if (pdp != NULL) {
	+ fdp = get_fib_dp_header(*pdp);
	+ fdp->ffi_idx[fd->fd_fibnum] = fd->fd_dp;
	+ FD_PRINTF(fd, "datapath attached");
	+ }
	+ }
	+
	+ return (0);
	+}
	+
	+int
	+fib_module_register(struct fib_lookup_module *flm)
	+{
	+
	+ MOD_LOCK();
	+ RH_PRINTF_RAW("linking %s (%p)", flm->flm_name, flm);
	+ TAILQ_INSERT_TAIL(&all_algo_list, flm, entries);
	+ MOD_UNLOCK();
	+
	+ return (0);
	+}
	+
	+int
	+fib_module_unregister(struct fib_lookup_module *flm)
	+{
	+ MOD_LOCK();
	+ if (flm->flm_refcount > 0) {
	+ MOD_UNLOCK();
	+ return (EBUSY);
	+ }
	+ RH_PRINTF_RAW("unlinking %s (%p)", flm->flm_name, flm);
	+ TAILQ_REMOVE(&all_algo_list, flm, entries);
	+ MOD_UNLOCK();
	+
	+ return (0);
	+}
	+
	+int
	+fib_module_clone(const struct fib_lookup_module *flm_orig,
	+ struct fib_lookup_module *flm, bool waitok)
	+{
	+
	+ return (0);
	+}
	+
	+int
	+fib_module_dumptree(struct fib_lookup_module *flm,
	+ enum rib_subscription_type subscription_type)
	+{
	+
	+
	+ return (0);
	+}
	+
	+static void
	+fib_algo_init(void)
	+{
	+
	+ mtx_init(&fib_mtx, "algo list mutex", NULL, MTX_DEF);
	+ TAILQ_INIT(&all_algo_list);
	+}
	+SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND, fib_algo_init, NULL);
	+
	Index: sys/net/route/route_ctl.h
	===================================================================
	--- sys/net/route/route_ctl.h
	+++ sys/net/route/route_ctl.h
	@@ -72,6 +72,8 @@
	void *arg);
	void rib_walk_ext(uint32_t fibnum, int af, bool wlock, rib_walktree_f_t *wa_f,
	rib_walk_hook_f_t hook_f, void arg);
	+void rib_walk_ext_internal(struct rib_head *rnh, bool wlock,
	+ rib_walktree_f_t wa_f, rib_walk_hook_f_t hook_f, void *arg);

	void rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f,
	void *arg, bool report);
	@@ -87,6 +89,10 @@
	const struct rtentry *rib_lookup_lpm(uint32_t fibnum, int family,
	const struct sockaddr dst, struct route_nhop_data rnd);

	+/* Nhops */
	+void nhop_ref_any(struct nhop_object *nh);
	+void nhop_free_any(struct nhop_object *nh);
	+
	/* Multipath */
	struct nhgrp_object;
	struct weightened_nhop;
	@@ -109,6 +115,6 @@
	struct rib_subscription rib_subscribe_internal(struct rib_head rnh,
	rib_subscription_cb_t f, void arg, enum rib_subscription_type type,
	bool waitok);
	-int rib_unsibscribe(uint32_t fibnum, int family, struct rib_subscription *rs);
	+void rib_unsibscribe(struct rib_subscription *rs);

	#endif
	Index: sys/net/route/route_ctl.c
	===================================================================
	--- sys/net/route/route_ctl.c
	+++ sys/net/route/route_ctl.c
	@@ -70,6 +70,7 @@
	CK_STAILQ_ENTRY(rib_subscription) next;
	rib_subscription_cb_t *func;
	void *arg;
	+ struct rib_head *rnh;
	enum rib_subscription_type type;
	struct epoch_context epoch_ctx;
	};
	@@ -669,6 +670,8 @@

	/* Finalize notification */
	rnh->rnh_gen++;
	+ rnh->rnh_prefixes--;
	+
	rc->rc_cmd = RTM_DELETE;
	rc->rc_rt = rt;
	rc->rc_nh_old = rt->rt_nhop;
	@@ -929,6 +932,7 @@

	/* Finalize notification */
	rnh->rnh_gen++;
	+ rnh->rnh_prefixes++;

	rc->rc_cmd = RTM_ADD;
	rc->rc_rt = rt;
	@@ -984,6 +988,8 @@

	/* Finalize notification */
	rnh->rnh_gen++;
	+ if (rnd->rnd_nhop == NULL)
	+ rnh->rnh_prefixes--;

	rc->rc_cmd = (rnd->rnd_nhop != NULL) ? RTM_CHANGE : RTM_DELETE;
	rc->rc_rt = rt;
	@@ -1222,7 +1228,7 @@
	enum rib_subscription_type type, bool waitok)
	{
	struct rib_subscription *rs;
	- int flags = M_ZERO \| (waitok ? M_WAITOK : 0);
	+ int flags = M_ZERO \| (waitok ? M_WAITOK : M_NOWAIT);

	rs = malloc(sizeof(struct rib_subscription), M_RTABLE, flags);
	if (rs == NULL)
	@@ -1246,22 +1252,14 @@
	enum rib_subscription_type type, bool waitok)
	{
	struct rib_head *rnh;
	- struct rib_subscription *rs;
	struct epoch_tracker et;

	- if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL)
	- return (NULL);
	-
	NET_EPOCH_ENTER(et);
	KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__));
	rnh = rt_tables_get_rnh(fibnum, family);
	-
	- RIB_WLOCK(rnh);
	- CK_STAILQ_INSERT_TAIL(&rnh->rnh_subscribers, rs, next);
	- RIB_WUNLOCK(rnh);
	NET_EPOCH_EXIT(et);

	- return (rs);
	+ return (rib_subscribe_internal(rnh, f, arg, type, waitok));
	}

	struct rib_subscription *
	@@ -1273,6 +1271,7 @@

	if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL)
	return (NULL);
	+ rs->rnh = rnh;

	NET_EPOCH_ENTER(et);
	RIB_WLOCK(rnh);
	@@ -1284,23 +1283,15 @@
	}

	/*
	- * Remove rtable subscription @rs from the table specified by @fibnum
	- * and @family.
	+ * Remove rtable subscription @rs from the routing table.
	* Needs to be run in network epoch.
	- *
	- * Returns 0 on success.
	*/
	-int
	-rib_unsibscribe(uint32_t fibnum, int family, struct rib_subscription *rs)
	+void
	+rib_unsibscribe(struct rib_subscription *rs)
	{
	- struct rib_head *rnh;
	+ struct rib_head *rnh = rs->rnh;

	NET_EPOCH_ASSERT();
	- KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__));
	- rnh = rt_tables_get_rnh(fibnum, family);
	-
	- if (rnh == NULL)
	- return (ENOENT);

	RIB_WLOCK(rnh);
	CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next);
	@@ -1308,8 +1299,6 @@

	epoch_call(net_epoch_preempt, destroy_subscription_epoch,
	&rs->epoch_ctx);
	-
	- return (0);
	}

	/*
	Index: sys/net/route/route_helpers.c
	===================================================================
	--- sys/net/route/route_helpers.c
	+++ sys/net/route/route_helpers.c
	@@ -77,14 +77,10 @@
	* Table is traversed under read lock unless @wlock is set.
	*/
	void
	-rib_walk_ext(uint32_t fibnum, int family, bool wlock, rib_walktree_f_t *wa_f,
	+rib_walk_ext_internal(struct rib_head rnh, bool wlock, rib_walktree_f_t wa_f,
	rib_walk_hook_f_t hook_f, void arg)
	{
	RIB_RLOCK_TRACKER;
	- struct rib_head *rnh;
	-
	- if ((rnh = rt_tables_get_rnh(fibnum, family)) == NULL)
	- return;

	if (wlock)
	RIB_WLOCK(rnh);
	@@ -101,6 +97,16 @@
	RIB_RUNLOCK(rnh);
	}

	+void
	+rib_walk_ext(uint32_t fibnum, int family, bool wlock, rib_walktree_f_t *wa_f,
	+ rib_walk_hook_f_t hook_f, void arg)
	+{
	+ struct rib_head *rnh;
	+
	+ if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL)
	+ rib_walk_ext_internal(rnh, wlock, wa_f, hook_f, arg);
	+}
	+
	/*
	* Calls @wa_f with @arg for each entry in the table specified by
	* @af and @fibnum.
	Index: sys/net/route/route_tables.c
	===================================================================
	--- sys/net/route/route_tables.c
	+++ sys/net/route/route_tables.c
	@@ -171,7 +171,7 @@
	grow_rtables(uint32_t num_tables)
	{
	struct domain *dom;
	- struct rib_head **prnh;
	+ struct rib_head *prnh, rh;
	struct rib_head new_rt_tables, old_rt_tables;
	int family;

	@@ -188,6 +188,8 @@
	"by default. Consider tuning %s if needed\n",
	"net.add_addr_allfibs");

	+ fib_grow_rtables(num_tables);
	+
	/*
	* Current rt_tables layout:
	* fib0[af0, af1, af2, .., AF_MAX]fib1[af0, af1, af2, .., Af_MAX]..
	@@ -206,10 +208,16 @@
	prnh = &new_rt_tables[i * (AF_MAX + 1) + family];
	if (*prnh != NULL)
	continue;
	- *prnh = dom->dom_rtattach(i);
	- if (*prnh == NULL)
	- log(LOG_ERR, "unable to create routing tables for domain %d\n",
	- dom->dom_family);
	+ rh = dom->dom_rtattach(i);
	+ if (rh == NULL)
	+ log(LOG_ERR, "unable to create routing table for %d.%d\n",
	+ dom->dom_family, i);
	+ if (fib_select_algo_initial(rh) != 0) {
	+ log(LOG_ERR, "unable to select algo for table %d.%d\n",
	+ dom->dom_family, i);
	+ // TODO: detach table
	+ }
	+ *prnh = rh;
	}
	}

	Index: sys/net/route/route_var.h
	===================================================================
	--- sys/net/route/route_var.h
	+++ sys/net/route/route_var.h
	@@ -68,8 +68,10 @@
	struct vnet rib_vnet; / vnet pointer */
	int rib_family; /* AF of the rtable */
	u_int rib_fibnum; /* fib number */
	+ bool rib_dying; /* rib is detaching */
	struct callout expire_callout; /* Callout for expiring dynamic routes */
	time_t next_expire; /* Next expire run ts */
	+ uint32_t rnh_prefixes; /* Number of prefixes */
	struct nh_control nh_control; / nexthop subsystem data */
	CK_STAILQ_HEAD(, rib_subscription) rnh_subscribers;/* notification subscribers */
	};
	@@ -241,7 +243,6 @@
	void nhops_destroy_rib(struct rib_head *rh);
	void nhop_ref_object(struct nhop_object *nh);
	int nhop_try_ref_object(struct nhop_object *nh);
	-void nhop_free_any(struct nhop_object *nh);

	void nhop_set_type(struct nhop_object *nh, enum nhop_type nh_type);
	void nhop_set_rtflags(struct nhop_object *nh, int rt_flags);
	@@ -253,6 +254,7 @@

	void nhops_update_ifmtu(struct rib_head rh, struct ifnet ifp, uint32_t mtu);
	int nhops_dump_sysctl(struct rib_head rh, struct sysctl_req w);
	+uint32_t nhops_get_count(struct rib_head *rh);

	/* MULTIPATH */
	#define MPF_MULTIPATH 0x08 /* need to be consistent with NHF_MULTIPATH */
	@@ -295,6 +297,7 @@

	/* nhgrp_ctl.c */
	int nhgrp_dump_sysctl(struct rib_head rh, struct sysctl_req w);
	+uint32_t nhgrp_get_count(struct rib_head *rh);

	int nhgrp_get_group(struct rib_head rh, struct weightened_nhop wn,
	int num_nhops, struct route_nhop_data *rnd);
	@@ -306,7 +309,14 @@
	struct route_nhop_data *rnd_new);

	uint32_t nhgrp_get_idx(const struct nhgrp_object *nhg);
	+void nhgrp_ref_object(struct nhgrp_object *nhg);
	void nhgrp_free(struct nhgrp_object *nhg);
	+uint32_t nhgrp_get_idx(const struct nhgrp_object *nhg);
	+
	+/* lookup_framework.c */
	+void fib_grow_rtables(uint32_t new_num_tables);
	+int fib_select_algo_initial(struct rib_head *rh);
	+void fib_destroy_rib(struct rib_head *rh);

	/* Entropy data used for outbound hashing */
	#define MPATH_ENTROPY_KEY_LEN 40
	Index: sys/netinet/in_fib.h
	===================================================================
	--- sys/netinet/in_fib.h
	+++ sys/netinet/in_fib.h
	@@ -45,10 +45,15 @@
	struct sockaddr_in ro_dst4;
	};

	+struct rtentry;
	+struct route_nhop_data;
	+
	struct nhop_object *fib4_lookup(uint32_t fibnum, struct in_addr dst,
	uint32_t scopeid, uint32_t flags, uint32_t flowid);
	int fib4_check_urpf(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
	uint32_t flags, const struct ifnet *src_if);
	+struct rtentry *fib4_lookup_rt(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
	+ uint32_t flags, struct route_nhop_data *nrd);
	struct nhop_object *fib4_lookup_debugnet(uint32_t fibnum, struct in_addr dst,
	uint32_t scopeid, uint32_t flags);
	uint32_t fib4_calc_software_hash(struct in_addr src, struct in_addr dst,
	Index: sys/netinet/in_fib.c
	===================================================================
	--- sys/netinet/in_fib.c
	+++ sys/netinet/in_fib.c
	@@ -49,6 +49,7 @@
	#include <net/route.h>
	#include <net/route/route_ctl.h>
	#include <net/route/route_var.h>
	+#include <net/route/route_algo.h>
	#include <net/route/nhop.h>
	#include <net/toeplitz.h>
	#include <net/vnet.h>
	@@ -63,6 +64,10 @@
	/* Assert 'struct route_in' is compatible with 'struct route' */
	CHK_STRUCT_ROUTE_COMPAT(struct route_in, ro_dst4);

	+#ifdef ROUTE_ALGO
	+VNET_DEFINE(struct fib_dp *, inet_dp);
	+#endif
	+
	#ifdef ROUTE_MPATH
	struct _hash_5tuple_ipv4 {
	struct in_addr src;
	@@ -75,7 +80,6 @@
	_Static_assert(sizeof(struct _hash_5tuple_ipv4) == 16,
	"_hash_5tuple_ipv4 size is wrong");

	-
	uint32_t
	fib4_calc_software_hash(struct in_addr src, struct in_addr dst,
	unsigned short src_port, unsigned short dst_port, char proto,
	@@ -104,6 +108,29 @@
	* one needs to pass NHR_REF as a flag. This will return referenced
	* nexthop.
	*/
	+#ifdef ROUTE_ALGO
	+struct nhop_object *
	+fib4_lookup(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
	+ uint32_t flags, uint32_t flowid)
	+{
	+ struct nhop_object *nh;
	+ struct fib_dp *dp = &V_inet_dp[fibnum];
	+ struct flm_lookup_key key = {.addr4 = dst };
	+
	+ nh = dp->f(dp->arg, key, scopeid);
	+ if (nh != NULL) {
	+ nh = nhop_select(nh, flowid);
	+ /* Ensure route & ifp is UP */
	+ if (RT_LINK_IS_UP(nh->nh_ifp)) {
	+ if (flags & NHR_REF)
	+ nhop_ref_object(nh);
	+ return (nh);
	+ }
	+ }
	+ RTSTAT_INC(rts_unreach);
	+ return (NULL);
	+}
	+#else
	struct nhop_object *
	fib4_lookup(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
	uint32_t flags, uint32_t flowid)
	@@ -143,6 +170,7 @@
	RTSTAT_INC(rts_unreach);
	return (NULL);
	}
	+#endif

	inline static int
	check_urpf_nhop(const struct nhop_object *nh, uint32_t flags,
	@@ -181,28 +209,19 @@
	return (check_urpf_nhop(nh, flags, src_if));
	}

	-/*
	- * Performs reverse path forwarding lookup.
	- * If @src_if is non-zero, verifies that at least 1 path goes via
	- * this interface.
	- * If @src_if is zero, verifies that route exist.
	- * if @flags contains NHR_NOTDEFAULT, do not consider default route.
	- *
	- * Returns 1 if route matching conditions is found, 0 otherwise.
	- */
	-int
	-fib4_check_urpf(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
	- uint32_t flags, const struct ifnet *src_if)
	+#ifndef ROUTE_ALGO
	+static struct nhop_object *
	+lookup_nhop(uint32_t fibnum, struct in_addr dst, uint32_t scopeid)
	{
	RIB_RLOCK_TRACKER;
	struct rib_head *rh;
	struct radix_node *rn;
	- int ret;
	+ struct nhop_object *nh;

	KASSERT((fibnum < rt_numfibs), ("fib4_check_urpf: bad fibnum"));
	rh = rt_tables_get_rnh(fibnum, AF_INET);
	if (rh == NULL)
	- return (0);
	+ return (NULL);

	/* Prepare lookup key */
	struct sockaddr_in sin4;
	@@ -210,49 +229,94 @@
	sin4.sin_len = sizeof(struct sockaddr_in);
	sin4.sin_addr = dst;

	+ nh = NULL;
	RIB_RLOCK(rh);
	rn = rh->rnh_matchaddr((void *)&sin4, &rh->head);
	- if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
	- ret = check_urpf(RNTORT(rn)->rt_nhop, flags, src_if);
	- RIB_RUNLOCK(rh);
	- return (ret);
	- }
	+ if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0))
	+ nh = RNTORT(rn)->rt_nhop;
	RIB_RUNLOCK(rh);

	+ return (nh);
	+}
	+#endif
	+
	+/*
	+ * Performs reverse path forwarding lookup.
	+ * If @src_if is non-zero, verifies that at least 1 path goes via
	+ * this interface.
	+ * If @src_if is zero, verifies that route exist.
	+ * if @flags contains NHR_NOTDEFAULT, do not consider default route.
	+ *
	+ * Returns 1 if route matching conditions is found, 0 otherwise.
	+ */
	+int
	+fib4_check_urpf(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
	+ uint32_t flags, const struct ifnet *src_if)
	+{
	+ struct nhop_object *nh;
	+#ifdef ROUTE_ALGO
	+ struct fib_dp *dp = &V_inet_dp[fibnum];
	+ struct flm_lookup_key key = {.addr4 = dst };
	+
	+ nh = dp->f(dp->arg, key, scopeid);
	+#else
	+ nh = lookup_nhop(fibnum, dst, scopeid);
	+#endif
	+ if (nh != NULL)
	+ return (check_urpf(nh, flags, src_if));
	+
	return (0);
	}

	-struct nhop_object *
	-fib4_lookup_debugnet(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
	- uint32_t flags)
	+struct rtentry *
	+fib4_lookup_rt(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
	+ uint32_t flags, struct route_nhop_data *rnd)
	{
	+ RIB_RLOCK_TRACKER;
	struct rib_head *rh;
	struct radix_node *rn;
	- struct nhop_object *nh;
	+ struct rtentry *rt;

	- KASSERT((fibnum < rt_numfibs), ("fib4_lookup_debugnet: bad fibnum"));
	+ KASSERT((fibnum < rt_numfibs), ("fib4_lookup_rt: bad fibnum"));
	rh = rt_tables_get_rnh(fibnum, AF_INET);
	if (rh == NULL)
	return (NULL);

	/* Prepare lookup key */
	- struct sockaddr_in sin4;
	- memset(&sin4, 0, sizeof(sin4));
	- sin4.sin_family = AF_INET;
	- sin4.sin_len = sizeof(struct sockaddr_in);
	- sin4.sin_addr = dst;
	-
	- nh = NULL;
	- /* unlocked lookup */
	+ struct sockaddr_in sin4 = {
	+ .sin_family = AF_INET,
	+ .sin_len = sizeof(struct sockaddr_in),
	+ .sin_addr = dst,
	+ };
	+
	+ rt = NULL;
	+ if (!(flags & NHR_UNLOCKED))
	+ RIB_RLOCK(rh);
	rn = rh->rnh_matchaddr((void *)&sin4, &rh->head);
	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
	- nh = nhop_select((RNTORT(rn))->rt_nhop, 0);
	+ rt = (struct rtentry *)rn;
	+ rnd->rnd_nhop = rt->rt_nhop;
	+ rnd->rnd_weight = rt->rt_weight;
	+ }
	+ if (!(flags & NHR_UNLOCKED))
	+ RIB_RUNLOCK(rh);
	+
	+ return (rt);
	+}
	+
	+struct nhop_object *
	+fib4_lookup_debugnet(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
	+ uint32_t flags)
	+{
	+ struct rtentry *rt;
	+ struct route_nhop_data rnd;
	+
	+ rt = fib4_lookup_rt(fibnum, dst, scopeid, NHR_UNLOCKED, &rnd);
	+ if (rt != NULL) {
	+ struct nhop_object *nh = nhop_select(rnd.rnd_nhop, 0);
	/* Ensure route & ifp is UP */
	- if (RT_LINK_IS_UP(nh->nh_ifp)) {
	- if (flags & NHR_REF)
	- nhop_ref_object(nh);
	+ if (RT_LINK_IS_UP(nh->nh_ifp))
	return (nh);
	- }
	}

	return (NULL);
	Index: sys/netinet/in_fib_algo.c
	===================================================================
	--- /dev/null
	+++ sys/netinet/in_fib_algo.c
	@@ -0,0 +1,315 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2020 Alexander V. Chernikov
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+__FBSDID("$FreeBSD$");
	+
	+#include <sys/param.h>
	+#include <sys/kernel.h>
	+#include <sys/lock.h>
	+#include <sys/rmlock.h>
	+#include <sys/malloc.h>
	+#include <sys/kernel.h>
	+#include <sys/priv.h>
	+#include <sys/socket.h>
	+#include <sys/sysctl.h>
	+#include <net/vnet.h>
	+
	+#include <net/if.h>
	+#include <netinet/in.h>
	+
	+#include <net/route.h>
	+#include <net/route/nhop.h>
	+#include <net/route/route_ctl.h>
	+#include <net/route/route_var.h>
	+#include <net/route/route_algo.h>
	+
	+
	+#define KEY_LEN_INET (offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t))
	+#define OFF_LEN_INET (8 * offsetof(struct sockaddr_in, sin_addr))
	+struct radix4_addr_entry {
	+ struct radix_node rn[2];
	+ struct sockaddr_in addr;
	+ struct nhop_object *nhop;
	+};
	+#define LRADIX4_ITEM_SZ roundup2(sizeof(struct radix4_addr_entry), 64)
	+
	+struct lradix4_data {
	+ struct radix_node_head *rnh;
	+ struct fib_data *fd;
	+ void *mem;
	+ uint32_t alloc_items;
	+ uint32_t num_items;
	+};
	+
	+static struct nhop_object *
	+lradix4_lookup(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
	+{
	+ struct radix_node_head rnh = (struct radix_node_head )algo_data;
	+ struct radix4_addr_entry *ent;
	+ struct sockaddr_in addr4 = {
	+ .sin_len = KEY_LEN_INET,
	+ .sin_addr = key.addr4,
	+ };
	+ ent = (struct radix4_addr_entry *)(rnh->rnh_matchaddr(&addr4, &rnh->rh));
	+ if (ent != NULL)
	+ return (ent->nhop);
	+ return (NULL);
	+}
	+
	+static uint8_t
	+lradix4_get_pref(const struct rib_rtable_info *rinfo)
	+{
	+
	+ if (rinfo->num_prefixes < 10)
	+ return (255);
	+ else if (rinfo->num_prefixes < 100000)
	+ return (255 - rinfo->num_prefixes / 394);
	+ else
	+ return (1);
	+}
	+
	+static enum flm_op_result
	+lradix4_init(uint32_t fibnum, struct fib_data fd, void _old_data, void **_data)
	+{
	+ struct lradix4_data *lr;
	+ struct rib_rtable_info rinfo;
	+ uint32_t count;
	+
	+ lr = malloc(sizeof(struct lradix4_data), M_RTABLE, M_NOWAIT \| M_ZERO);
	+ if (lr == NULL \|\| !rn_inithead((void **)&lr->rnh, OFF_LEN_INET))
	+ return (FLM_REBUILD);
	+ fib_get_rtable_info(fib_get_rh(fd), &rinfo);
	+
	+ count = rinfo.num_prefixes * 11 / 10;
	+ // XXX: alignment!
	+ lr->mem = malloc(count * LRADIX4_ITEM_SZ, M_RTABLE, M_NOWAIT \| M_ZERO);
	+ if (lr->mem == NULL)
	+ return (FLM_REBUILD);
	+ lr->alloc_items = count;
	+ lr->fd = fd;
	+
	+ *_data = lr;
	+
	+ return (FLM_SUCCESS);
	+}
	+
	+static void
	+lradix4_destroy(void *_data)
	+{
	+ struct lradix4_data lr = (struct lradix4_data )_data;
	+
	+ if (lr->rnh != NULL)
	+ rn_detachhead((void **)&lr->rnh);
	+ if (lr->mem != NULL)
	+ free(lr->mem, M_RTABLE);
	+ free(lr, M_RTABLE);
	+}
	+
	+static enum flm_op_result
	+lradix4_add_route_cb(struct rtentry rt, void _data)
	+{
	+ struct lradix4_data lr = (struct lradix4_data )_data;
	+ struct radix4_addr_entry *ae;
	+ struct sockaddr_in rt_dst, rt_mask, mask;
	+ struct radix_node *rn;
	+
	+ if (fib_get_nhop_idx(lr->fd, rt->rt_nhop) == 0)
	+ return (FLM_REBUILD);
	+
	+ if (lr->num_items >= lr->alloc_items)
	+ return (FLM_REBUILD);
	+
	+ ae = (struct radix4_addr_entry )((char )lr->mem + lr->num_items * LRADIX4_ITEM_SZ);
	+ lr->num_items++;
	+
	+ ae->nhop = rt->rt_nhop;
	+
	+ rt_dst = (struct sockaddr_in *)rt_key(rt);
	+ rt_mask = (struct sockaddr_in *)rt_mask(rt);
	+
	+ ae->addr.sin_len = KEY_LEN_INET;
	+ ae->addr.sin_addr = rt_dst->sin_addr;
	+
	+ if (rt_mask != NULL) {
	+ bzero(&mask, sizeof(mask));
	+ mask.sin_len = KEY_LEN_INET;
	+ mask.sin_addr = rt_mask->sin_addr;
	+ rt_mask = &mask;
	+ }
	+
	+ rn = lr->rnh->rnh_addaddr((struct sockaddr *)&ae->addr,
	+ (struct sockaddr *)rt_mask, &lr->rnh->rh, ae->rn);
	+ if (rn == NULL)
	+ return (FLM_REBUILD);
	+
	+ return (FLM_SUCCESS);
	+}
	+
	+static enum flm_op_result
	+lradix4_end_dump(void _data, struct fib_dp dp)
	+{
	+ struct lradix4_data lr = (struct lradix4_data )_data;
	+
	+ dp->f = lradix4_lookup;
	+ dp->arg = lr->rnh;
	+
	+ return (FLM_SUCCESS);
	+}
	+
	+static enum flm_op_result
	+lradix4_change_cb(struct rib_head rnh, struct rib_cmd_info rc,
	+ void *_data)
	+{
	+
	+ return (FLM_REBUILD);
	+}
	+
	+struct fib_lookup_module flm_radix4_lockless = {
	+ .flm_name = "radix4_lockless",
	+ .flm_family = AF_INET,
	+ .flm_init_cb = lradix4_init,
	+ .flm_destroy_cb = lradix4_destroy,
	+ .flm_dump_rib_item_cb = lradix4_add_route_cb,
	+ .flm_dump_end_cb = lradix4_end_dump,
	+ .flm_change_rib_item_cb = lradix4_change_cb,
	+ .flm_get_pref = lradix4_get_pref,
	+};
	+
	+
	+struct radix4_data {
	+ struct fib_data *fd;
	+ struct rib_head *rh;
	+};
	+
	+static struct nhop_object *
	+radix4_lookup(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
	+{
	+ RIB_RLOCK_TRACKER;
	+ struct rib_head rh = (struct rib_head )algo_data;
	+ struct radix_node *rn;
	+ struct nhop_object *nh;
	+
	+ /* Prepare lookup key */
	+ struct sockaddr_in sin4 = {
	+ .sin_family = AF_INET,
	+ .sin_len = sizeof(struct sockaddr_in),
	+ .sin_addr = key.addr4,
	+ };
	+
	+ nh = NULL;
	+ RIB_RLOCK(rh);
	+ rn = rh->rnh_matchaddr((void *)&sin4, &rh->head);
	+ if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0))
	+ nh = (RNTORT(rn))->rt_nhop;
	+ RIB_RUNLOCK(rh);
	+
	+ return (nh);
	+}
	+
	+static uint8_t
	+radix4_get_pref(const struct rib_rtable_info *rinfo)
	+{
	+
	+ return (50);
	+}
	+
	+static enum flm_op_result
	+radix4_init(uint32_t fibnum, struct fib_data fd, void _old_data, void **_data)
	+{
	+ struct radix4_data *r4;
	+
	+ r4 = malloc(sizeof(struct radix4_data), M_RTABLE, M_NOWAIT \| M_ZERO);
	+ if (r4 == NULL)
	+ return (FLM_REBUILD);
	+ r4->fd = fd;
	+ r4->rh = fib_get_rh(fd);
	+ if (r4->rh == NULL)
	+ return (FLM_ERROR);
	+
	+ *_data = r4;
	+
	+ return (FLM_SUCCESS);
	+}
	+
	+static void
	+radix4_destroy(void *_data)
	+{
	+
	+ free(_data, M_RTABLE);
	+}
	+
	+static enum flm_op_result
	+radix4_end_dump(void _data, struct fib_dp dp)
	+{
	+ struct radix4_data r4 = (struct radix4_data )_data;
	+
	+ dp->f = radix4_lookup;
	+ dp->arg = r4->rh;
	+
	+ return (FLM_SUCCESS);
	+}
	+
	+static enum flm_op_result
	+radix4_change_cb(struct rib_head rnh, struct rib_cmd_info rc,
	+ void *_data)
	+{
	+ struct radix4_data r4 = (struct radix4_data )_data;
	+
	+ /*
	+ * Grab additional reference for each nexthop to maintain guarantee
	+ * that we have non-zero # of reference for each nexthop in radix in
	+ * the epoch.
	+ */
	+ if (rc->rc_nh_new != NULL) {
	+ if (fib_get_nhop_idx(r4->fd, rc->rc_nh_new) == 0)
	+ return (FLM_REBUILD);
	+ }
	+ if (rc->rc_nh_old != NULL)
	+ fib_free_nhop(r4->fd, rc->rc_nh_old);
	+
	+ return (FLM_SUCCESS);
	+}
	+
	+struct fib_lookup_module flm_radix4 = {
	+ .flm_name = "radix4",
	+ .flm_family = AF_INET,
	+ .flm_init_cb = radix4_init,
	+ .flm_destroy_cb = radix4_destroy,
	+ .flm_dump_end_cb = radix4_end_dump,
	+ .flm_change_rib_item_cb = radix4_change_cb,
	+ .flm_get_pref = radix4_get_pref,
	+};
	+
	+static void
	+fib4_algo_init(void)
	+{
	+
	+ fib_module_register(&flm_radix4_lockless);
	+ fib_module_register(&flm_radix4);
	+}
	+SYSINIT(fib4_algo_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, fib4_algo_init, NULL);
	Index: sys/netinet6/in6_fib.h
	===================================================================
	--- sys/netinet6/in6_fib.h
	+++ sys/netinet6/in6_fib.h
	@@ -32,11 +32,16 @@
	#ifndef _NETINET6_IN6_FIB_H_
	#define _NETINET6_IN6_FIB_H_

	+struct rtentry;
	+struct route_nhop_data;
	+
	struct nhop_object *fib6_lookup(uint32_t fibnum,
	const struct in6_addr *dst6, uint32_t scopeid, uint32_t flags,
	uint32_t flowid);
	int fib6_check_urpf(uint32_t fibnum, const struct in6_addr *dst6,
	uint32_t scopeid, uint32_t flags, const struct ifnet *src_if);
	+struct rtentry fib6_lookup_rt(uint32_t fibnum, const struct in6_addr dst6,
	+ uint32_t scopeid, uint32_t flags, struct route_nhop_data *rnd);
	struct nhop_object *fib6_lookup_debugnet(uint32_t fibnum,
	const struct in6_addr *dst6, uint32_t scopeid, uint32_t flags);
	uint32_t fib6_calc_software_hash(const struct in6_addr *src,
	Index: sys/netinet6/in6_fib.c
	===================================================================
	--- sys/netinet6/in6_fib.c
	+++ sys/netinet6/in6_fib.c
	@@ -50,6 +50,7 @@
	#include <net/route.h>
	#include <net/route/route_ctl.h>
	#include <net/route/route_var.h>
	+#include <net/route/route_algo.h>
	#include <net/route/nhop.h>
	#include <net/toeplitz.h>
	#include <net/vnet.h>
	@@ -69,6 +70,10 @@

	CHK_STRUCT_ROUTE_COMPAT(struct route_in6, ro_dst);

	+#ifdef ROUTE_ALGO
	+VNET_DEFINE(struct fib_dp *, inet6_dp);
	+#endif
	+
	#ifdef ROUTE_MPATH
	struct _hash_5tuple_ipv6 {
	struct in6_addr src;
	@@ -81,6 +86,7 @@
	_Static_assert(sizeof(struct _hash_5tuple_ipv6) == 40,
	"_hash_5tuple_ipv6 size is wrong");

	+
	uint32_t
	fib6_calc_software_hash(const struct in6_addr src, const struct in6_addr dst,
	unsigned short src_port, unsigned short dst_port, char proto,
	@@ -111,6 +117,29 @@
	* one needs to pass NHR_REF as a flag. This will return referenced
	* nexthop.
	*/
	+#ifdef ROUTE_ALGO
	+struct nhop_object *
	+fib6_lookup(uint32_t fibnum, const struct in6_addr *dst6,
	+ uint32_t scopeid, uint32_t flags, uint32_t flowid)
	+{
	+ struct nhop_object *nh;
	+ struct fib_dp *dp = &V_inet6_dp[fibnum];
	+ struct flm_lookup_key key = {.addr6 = dst6 };
	+
	+ nh = dp->f(dp->arg, key, scopeid);
	+ if (nh != NULL) {
	+ nh = nhop_select(nh, flowid);
	+ /* Ensure route & ifp is UP */
	+ if (RT_LINK_IS_UP(nh->nh_ifp)) {
	+ if (flags & NHR_REF)
	+ nhop_ref_object(nh);
	+ return (nh);
	+ }
	+ }
	+ RTSTAT_INC(rts_unreach);
	+ return (NULL);
	+}
	+#else
	struct nhop_object *
	fib6_lookup(uint32_t fibnum, const struct in6_addr *dst6,
	uint32_t scopeid, uint32_t flags, uint32_t flowid)
	@@ -119,7 +148,6 @@
	struct rib_head *rh;
	struct radix_node *rn;
	struct nhop_object *nh;
	- struct sockaddr_in6 sin6;

	KASSERT((fibnum < rt_numfibs), ("fib6_lookup: bad fibnum"));
	rh = rt_tables_get_rnh(fibnum, AF_INET6);
	@@ -127,11 +155,10 @@
	return (NULL);

	/* TODO: radix changes */
	- //addr = *dst6;
	- /* Prepare lookup key */
	- memset(&sin6, 0, sizeof(sin6));
	- sin6.sin6_len = sizeof(struct sockaddr_in6);
	- sin6.sin6_addr = *dst6;
	+ struct sockaddr_in6 sin6 = {
	+ .sin6_len = sizeof(struct sockaddr_in6),
	+ .sin6_addr = *dst6,
	+ };

	/* Assume scopeid is valid and embed it directly */
	if (IN6_IS_SCOPE_LINKLOCAL(dst6))
	@@ -154,6 +181,7 @@
	RTSTAT_INC(rts_unreach);
	return (NULL);
	}
	+#endif

	inline static int
	check_urpf_nhop(const struct nhop_object *nh, uint32_t flags,
	@@ -192,60 +220,75 @@
	return (check_urpf_nhop(nh, flags, src_if));
	}

	-/*
	- * Performs reverse path forwarding lookup.
	- * If @src_if is non-zero, verifies that at least 1 path goes via
	- * this interface.
	- * If @src_if is zero, verifies that route exist.
	- * if @flags contains NHR_NOTDEFAULT, do not consider default route.
	- *
	- * Returns 1 if route matching conditions is found, 0 otherwise.
	- */
	-int
	-fib6_check_urpf(uint32_t fibnum, const struct in6_addr *dst6,
	- uint32_t scopeid, uint32_t flags, const struct ifnet *src_if)
	+static struct nhop_object *
	+lookup_nhop(uint32_t fibnum, const struct in6_addr *dst6,
	+ uint32_t scopeid)
	{
	RIB_RLOCK_TRACKER;
	struct rib_head *rh;
	struct radix_node *rn;
	- struct sockaddr_in6 sin6;
	- int ret;
	+ struct nhop_object *nh;

	KASSERT((fibnum < rt_numfibs), ("fib6_check_urpf: bad fibnum"));
	rh = rt_tables_get_rnh(fibnum, AF_INET6);
	if (rh == NULL)
	- return (0);
	+ return (NULL);

	- /* TODO: radix changes */
	/* Prepare lookup key */
	- memset(&sin6, 0, sizeof(sin6));
	- sin6.sin6_len = sizeof(struct sockaddr_in6);
	- sin6.sin6_addr = *dst6;
	+ struct sockaddr_in6 sin6 = {
	+ .sin6_len = sizeof(struct sockaddr_in6),
	+ .sin6_addr = *dst6,
	+ };

	/* Assume scopeid is valid and embed it directly */
	if (IN6_IS_SCOPE_LINKLOCAL(dst6))
	sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff);

	+ nh = NULL;
	RIB_RLOCK(rh);
	rn = rh->rnh_matchaddr((void *)&sin6, &rh->head);
	- if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
	- ret = check_urpf(RNTORT(rn)->rt_nhop, flags, src_if);
	- RIB_RUNLOCK(rh);
	- return (ret);
	- }
	+ if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0))
	+ nh = RNTORT(rn)->rt_nhop;
	RIB_RUNLOCK(rh);

	+ return (nh);
	+}
	+
	+/*
	+ * Performs reverse path forwarding lookup.
	+ * If @src_if is non-zero, verifies that at least 1 path goes via
	+ * this interface.
	+ * If @src_if is zero, verifies that route exist.
	+ * if @flags contains NHR_NOTDEFAULT, do not consider default route.
	+ *
	+ * Returns 1 if route matching conditions is found, 0 otherwise.
	+ */
	+int
	+fib6_check_urpf(uint32_t fibnum, const struct in6_addr *dst6,
	+ uint32_t scopeid, uint32_t flags, const struct ifnet *src_if)
	+{
	+ struct nhop_object *nh;
	+#ifndef ROUTE_ALGO
	+ struct fib_dp *dp = &V_inet6_dp[fibnum];
	+ struct flm_lookup_key key = {.addr6 = dst6 };
	+
	+ nh = dp->f(dp->arg, key, scopeid);
	+#else
	+ nh = lookup_nhop(fibnum, dst6, scopeid);
	+#endif
	+ if (nh != NULL)
	+ return (check_urpf(nh, flags, src_if));
	return (0);
	}

	-struct nhop_object *
	-fib6_lookup_debugnet(uint32_t fibnum, const struct in6_addr *dst6,
	- uint32_t scopeid, uint32_t flags)
	+struct rtentry *
	+fib6_lookup_rt(uint32_t fibnum, const struct in6_addr *dst6,
	+ uint32_t scopeid, uint32_t flags, struct route_nhop_data *rnd)
	{
	+ RIB_RLOCK_TRACKER;
	struct rib_head *rh;
	struct radix_node *rn;
	- struct nhop_object *nh;
	- struct sockaddr_in6 sin6;
	+ struct rtentry *rt;

	KASSERT((fibnum < rt_numfibs), ("fib6_lookup: bad fibnum"));
	rh = rt_tables_get_rnh(fibnum, AF_INET6);
	@@ -253,25 +296,43 @@
	return (NULL);

	/* TODO: radix changes */
	- //addr = *dst6;
	- /* Prepare lookup key */
	- memset(&sin6, 0, sizeof(sin6));
	- sin6.sin6_len = sizeof(struct sockaddr_in6);
	- sin6.sin6_addr = *dst6;
	+ struct sockaddr_in6 sin6 = {
	+ .sin6_len = sizeof(struct sockaddr_in6),
	+ .sin6_addr = *dst6,
	+ };

	/* Assume scopeid is valid and embed it directly */
	if (IN6_IS_SCOPE_LINKLOCAL(dst6))
	sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff);

	+ rt = NULL;
	+ if (!(flags & NHR_UNLOCKED))
	+ RIB_RLOCK(rh);
	rn = rh->rnh_matchaddr((void *)&sin6, &rh->head);
	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
	- nh = nhop_select((RNTORT(rn))->rt_nhop, 0);
	+ rt = (struct rtentry *)rn;
	+ rnd->rnd_nhop = rt->rt_nhop;
	+ rnd->rnd_weight = rt->rt_weight;
	+ }
	+ if (!(flags & NHR_UNLOCKED))
	+ RIB_RUNLOCK(rh);
	+
	+ return (rt);
	+}
	+
	+struct nhop_object *
	+fib6_lookup_debugnet(uint32_t fibnum, const struct in6_addr *dst6,
	+ uint32_t scopeid, uint32_t flags)
	+{
	+ struct rtentry *rt;
	+ struct route_nhop_data rnd;
	+
	+ rt = fib6_lookup_rt(fibnum, dst6, scopeid, NHR_UNLOCKED, &rnd);
	+ if (rt != NULL) {
	+ struct nhop_object *nh = nhop_select(rnd.rnd_nhop, 0);
	/* Ensure route & ifp is UP */
	- if (RT_LINK_IS_UP(nh->nh_ifp)) {
	- if (flags & NHR_REF)
	- nhop_ref_object(nh);
	+ if (RT_LINK_IS_UP(nh->nh_ifp))
	return (nh);
	- }
	}

	return (NULL);
	Index: sys/netinet6/in6_fib_algo.c
	===================================================================
	--- /dev/null
	+++ sys/netinet6/in6_fib_algo.c
	@@ -0,0 +1,338 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2020 Alexander V. Chernikov
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+__FBSDID("$FreeBSD$");
	+
	+#include "opt_inet6.h"
	+
	+#include <sys/param.h>
	+#include <sys/eventhandler.h>
	+#include <sys/kernel.h>
	+#include <sys/lock.h>
	+#include <sys/rmlock.h>
	+#include <sys/malloc.h>
	+#include <sys/mbuf.h>
	+#include <sys/module.h>
	+#include <sys/kernel.h>
	+#include <sys/priv.h>
	+#include <sys/proc.h>
	+#include <sys/socket.h>
	+#include <sys/socketvar.h>
	+#include <sys/sysctl.h>
	+#include <net/vnet.h>
	+
	+#include <net/if.h>
	+#include <net/if_var.h>
	+
	+#include <netinet/in.h>
	+#include <netinet/in_var.h>
	+#include <netinet/ip.h>
	+#include <netinet/ip_var.h>
	+#include <netinet/ip6.h>
	+#include <netinet6/ip6_var.h>
	+
	+#include <net/route.h>
	+#include <net/route/nhop.h>
	+#include <net/route/route_ctl.h>
	+#include <net/route/route_var.h>
	+#include <net/route/route_algo.h>
	+#define RTDEBUG
	+
	+#define KEY_LEN_INET6 (offsetof(struct sa_in6, sin6_addr) + sizeof(struct in6_addr))
	+#define OFF_LEN_INET6 (8 * offsetof(struct sa_in6, sin6_addr))
	+struct sa_in6 {
	+ uint8_t sin6_len;
	+ uint8_t sin6_family;
	+ uint8_t pad[2];
	+ struct in6_addr sin6_addr;
	+};
	+struct radix6_addr_entry {
	+ struct radix_node rn[2];
	+ struct sa_in6 addr;
	+ struct nhop_object *nhop;
	+};
	+#define LRADIX6_ITEM_SZ roundup2(sizeof(struct radix6_addr_entry), 64)
	+
	+struct lradix6_data {
	+ struct radix_node_head *rnh;
	+ struct fib_data *fd;
	+ void *mem;
	+ uint32_t alloc_items;
	+ uint32_t num_items;
	+};
	+
	+static struct nhop_object *
	+lradix6_lookup(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
	+{
	+ struct radix_node_head rnh = (struct radix_node_head )algo_data;
	+ struct radix6_addr_entry *ent;
	+ struct sockaddr_in6 addr6 = {
	+ .sin6_len = KEY_LEN_INET6,
	+ .sin6_addr = *key.addr6,
	+ };
	+ ent = (struct radix6_addr_entry *)(rnh->rnh_matchaddr(&addr6, &rnh->rh));
	+ if (ent != NULL)
	+ return (ent->nhop);
	+ return (NULL);
	+}
	+
	+static uint8_t
	+lradix6_get_pref(const struct rib_rtable_info *rinfo)
	+{
	+
	+ if (rinfo->num_prefixes < 10)
	+ return (255);
	+ else if (rinfo->num_prefixes < 100000)
	+ return (255 - rinfo->num_prefixes / 394);
	+ else
	+ return (1);
	+}
	+
	+static enum flm_op_result
	+lradix6_init(uint32_t fibnum, struct fib_data fd, void _old_data, void **_data)
	+{
	+ struct lradix6_data *lr;
	+ struct rib_rtable_info rinfo;
	+ uint32_t count;
	+
	+ lr = malloc(sizeof(struct lradix6_data), M_RTABLE, M_NOWAIT \| M_ZERO);
	+ if (lr == NULL \|\| !rn_inithead((void **)&lr->rnh, OFF_LEN_INET6))
	+ return (FLM_REBUILD);
	+ fib_get_rtable_info(fib_get_rh(fd), &rinfo);
	+
	+ count = rinfo.num_prefixes * 11 / 10;
	+ // XXX: alignment!
	+ lr->mem = malloc(count * LRADIX6_ITEM_SZ, M_RTABLE, M_NOWAIT \| M_ZERO);
	+ if (lr->mem == NULL)
	+ return (FLM_REBUILD);
	+ lr->alloc_items = count;
	+ lr->fd = fd;
	+
	+ *_data = lr;
	+
	+ return (FLM_SUCCESS);
	+}
	+
	+static void
	+lradix6_destroy(void *_data)
	+{
	+ struct lradix6_data lr = (struct lradix6_data )_data;
	+
	+ if (lr->rnh != NULL)
	+ rn_detachhead((void **)&lr->rnh);
	+ if (lr->mem != NULL)
	+ free(lr->mem, M_RTABLE);
	+ free(lr, M_RTABLE);
	+}
	+
	+static enum flm_op_result
	+lradix6_add_route_cb(struct rtentry rt, void _data)
	+{
	+ struct lradix6_data lr = (struct lradix6_data )_data;
	+ struct radix6_addr_entry *ae;
	+ struct sockaddr_in6 rt_dst, rt_mask;
	+ struct sa_in6 mask;
	+ struct radix_node *rn;
	+
	+ if (fib_get_nhop_idx(lr->fd, rt->rt_nhop) == 0)
	+ return (FLM_REBUILD);
	+
	+ if (lr->num_items >= lr->alloc_items)
	+ return (FLM_REBUILD);
	+
	+ ae = (struct radix6_addr_entry )((char )lr->mem + lr->num_items * LRADIX6_ITEM_SZ);
	+ lr->num_items++;
	+
	+ ae->nhop = rt->rt_nhop;
	+
	+ rt_dst = (struct sockaddr_in6 *)rt_key(rt);
	+ rt_mask = (struct sockaddr_in6 *)rt_mask(rt);
	+
	+ ae->addr.sin6_len = KEY_LEN_INET6;
	+ ae->addr.sin6_addr = rt_dst->sin6_addr;
	+
	+ if (rt_mask != NULL) {
	+ bzero(&mask, sizeof(mask));
	+ mask.sin6_len = KEY_LEN_INET6;
	+ mask.sin6_addr = rt_mask->sin6_addr;
	+ rt_mask = (struct sockaddr_in6 *)&mask;
	+ }
	+
	+ rn = lr->rnh->rnh_addaddr((struct sockaddr *)&ae->addr,
	+ (struct sockaddr *)rt_mask, &lr->rnh->rh, ae->rn);
	+ if (rn == NULL)
	+ return (FLM_REBUILD);
	+
	+ return (FLM_SUCCESS);
	+}
	+
	+static enum flm_op_result
	+lradix6_end_dump(void _data, struct fib_dp dp)
	+{
	+ struct lradix6_data lr = (struct lradix6_data )_data;
	+
	+ dp->f = lradix6_lookup;
	+ dp->arg = lr->rnh;
	+
	+ return (FLM_SUCCESS);
	+}
	+
	+static enum flm_op_result
	+lradix6_change_cb(struct rib_head rnh, struct rib_cmd_info rc,
	+ void *_data)
	+{
	+
	+ return (FLM_REBUILD);
	+}
	+
	+struct fib_lookup_module flm_radix6_lockless = {
	+ .flm_name = "radix6_lockless",
	+ .flm_family = AF_INET6,
	+ .flm_init_cb = lradix6_init,
	+ .flm_destroy_cb = lradix6_destroy,
	+ .flm_dump_rib_item_cb = lradix6_add_route_cb,
	+ .flm_dump_end_cb = lradix6_end_dump,
	+ .flm_change_rib_item_cb = lradix6_change_cb,
	+ .flm_get_pref = lradix6_get_pref,
	+};
	+
	+
	+struct radix6_data {
	+ struct fib_data *fd;
	+ struct rib_head *rh;
	+};
	+
	+static struct nhop_object *
	+radix6_lookup(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid)
	+{
	+ RIB_RLOCK_TRACKER;
	+ struct rib_head rh = (struct rib_head )algo_data;
	+ struct radix_node *rn;
	+ struct nhop_object *nh;
	+
	+ /* Prepare lookup key */
	+ struct sockaddr_in6 sin6 = {
	+ .sin6_family = AF_INET6,
	+ .sin6_len = sizeof(struct sockaddr_in6),
	+ .sin6_addr = *key.addr6,
	+ };
	+ if (IN6_IS_SCOPE_LINKLOCAL(key.addr6))
	+ sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff);
	+
	+ nh = NULL;
	+ RIB_RLOCK(rh);
	+ rn = rh->rnh_matchaddr((void *)&sin6, &rh->head);
	+ if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0))
	+ nh = (RNTORT(rn))->rt_nhop;
	+ RIB_RUNLOCK(rh);
	+
	+ return (nh);
	+}
	+
	+static uint8_t
	+radix6_get_pref(const struct rib_rtable_info *rinfo)
	+{
	+
	+ return (50);
	+}
	+
	+static enum flm_op_result
	+radix6_init(uint32_t fibnum, struct fib_data fd, void _old_data, void **_data)
	+{
	+ struct radix6_data *r6;
	+
	+ r6 = malloc(sizeof(struct radix6_data), M_RTABLE, M_NOWAIT \| M_ZERO);
	+ if (r6 == NULL)
	+ return (FLM_REBUILD);
	+ r6->fd = fd;
	+ r6->rh = fib_get_rh(fd);
	+ if (r6->rh == NULL)
	+ return (FLM_ERROR);
	+
	+ *_data = r6;
	+
	+ return (FLM_SUCCESS);
	+}
	+
	+static void
	+radix6_destroy(void *_data)
	+{
	+
	+ free(_data, M_RTABLE);
	+}
	+
	+static enum flm_op_result
	+radix6_end_dump(void _data, struct fib_dp dp)
	+{
	+ struct radix6_data r6 = (struct radix6_data )_data;
	+
	+ dp->f = radix6_lookup;
	+ dp->arg = r6->rh;
	+
	+ return (FLM_SUCCESS);
	+}
	+
	+static enum flm_op_result
	+radix6_change_cb(struct rib_head rnh, struct rib_cmd_info rc,
	+ void *_data)
	+{
	+ struct radix6_data r6 = (struct radix6_data )_data;
	+
	+ /*
	+ * Grab additional reference for each nexthop to maintain guarantee
	+ * that we have non-zero # of reference for each nexthop in radix in
	+ * the epoch.
	+ */
	+ if (rc->rc_nh_new != NULL) {
	+ if (fib_get_nhop_idx(r6->fd, rc->rc_nh_new) == 0)
	+ return (FLM_REBUILD);
	+ }
	+ if (rc->rc_nh_old != NULL)
	+ fib_free_nhop(r6->fd, rc->rc_nh_old);
	+
	+ return (FLM_SUCCESS);
	+}
	+
	+struct fib_lookup_module flm_radix6 = {
	+ .flm_name = "radix6",
	+ .flm_family = AF_INET6,
	+ .flm_init_cb = radix6_init,
	+ .flm_destroy_cb = radix6_destroy,
	+ .flm_dump_end_cb = radix6_end_dump,
	+ .flm_change_rib_item_cb = radix6_change_cb,
	+ .flm_get_pref = radix6_get_pref,
	+};
	+
	+static void
	+fib6_algo_init(void)
	+{
	+
	+ fib_module_register(&flm_radix6_lockless);
	+ fib_module_register(&flm_radix6);
	+}
	+SYSINIT(fib6_algo_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, fib6_algo_init, NULL);

File Metadata

Mime Type: text/plain
Expires: Mon, Jan 27, 2:53 PM (2 h, 27 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 16199545
Default Alt Text: D27401.id80103.diff (241 KB)

D27401.id80103.diffNo OneTemporaryActions

D27401.id80103.diffView Options

File Metadata

Event Timeline

D27401.id80103.diff
No OneTemporary
Actions

D27401.id80103.diff
View Options