Page MenuHomeFreeBSD

D33658.id100573.diff
No OneTemporary

D33658.id100573.diff

Index: sys/conf/files
===================================================================
--- sys/conf/files
+++ sys/conf/files
@@ -4156,6 +4156,7 @@
net/route/nhgrp_ctl.c optional route_mpath
net/route/nhop.c standard
net/route/nhop_ctl.c standard
+net/route/nhop_neigh.c standard
net/route/nhop_utils.c standard
net/route/fib_algo.c optional fib_algo
net/route/route_ctl.c standard
Index: sys/net/if.c
===================================================================
--- sys/net/if.c
+++ sys/net/if.c
@@ -337,11 +337,6 @@
SX_SYSINIT_FLAGS(ifnet_detach, &ifnet_detach_sxlock, "ifnet_detach_sx",
SX_RECURSE);
-#ifdef VIMAGE
-#define VNET_IS_SHUTTING_DOWN(_vnet) \
- ((_vnet)->vnet_shutdown && (_vnet)->vnet_state < SI_SUB_VNET_DONE)
-#endif
-
static if_com_alloc_t *if_com_alloc[256];
static if_com_free_t *if_com_free[256];
@@ -1122,7 +1117,7 @@
#ifdef VIMAGE
bool shutdown;
- shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet);
+ shutdown = VNET_IS_DYING(ifp->if_vnet);
#endif
/*
@@ -1367,7 +1362,7 @@
}
/* Make sure the VNET is stable. */
- shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet);
+ shutdown = VNET_IS_DYING(ifp->if_vnet);
if (shutdown) {
CURVNET_RESTORE();
prison_free(pr);
@@ -1425,7 +1420,7 @@
}
/* Make sure the VNET is stable. */
- shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet);
+ shutdown = VNET_IS_DYING(ifp->if_vnet);
if (shutdown) {
CURVNET_RESTORE();
prison_free(pr);
@@ -2868,7 +2863,7 @@
CURVNET_SET(so->so_vnet);
#ifdef VIMAGE
/* Make sure the VNET is stable. */
- shutdown = VNET_IS_SHUTTING_DOWN(so->so_vnet);
+ shutdown = VNET_IS_DYING(so->so_vnet);
if (shutdown) {
CURVNET_RESTORE();
return (EBUSY);
Index: sys/net/if_llatbl.c
===================================================================
--- sys/net/if_llatbl.c
+++ sys/net/if_llatbl.c
@@ -59,6 +59,7 @@
#include <net/if_var.h>
#include <net/route.h>
#include <net/route/route_ctl.h>
+#include <net/route/nhop.h>
#include <net/vnet.h>
#include <netinet/if_ether.h>
#include <netinet6/in6_var.h>
@@ -428,15 +429,19 @@
llentry_request_feedback(struct llentry *lle)
{
struct llentry *child_lle;
+ struct ifnet *ifp = lle->lle_tbl->llt_ifp;
+ int family = lle->lle_tbl->llt_af;
LLE_REQ_LOCK(lle);
lle->r_skip_req = 1;
LLE_REQ_UNLOCK(lle);
+ nhops_request_feedback(ifp, family, lle);
CK_SLIST_FOREACH(child_lle, &lle->lle_children, lle_child_next) {
LLE_REQ_LOCK(child_lle);
child_lle->r_skip_req = 1;
LLE_REQ_UNLOCK(child_lle);
+ nhops_request_feedback(ifp, family, child_lle);
}
}
@@ -462,13 +467,18 @@
static time_t
llentry_get_hittime_raw(struct llentry *lle)
{
- time_t lle_hittime = 0;
+ time_t lle_hittime = 0, nhops_hittime = 0;
LLE_REQ_LOCK(lle);
if ((lle->r_skip_req == 0) && (lle_hittime < lle->lle_hittime))
lle_hittime = lle->lle_hittime;
LLE_REQ_UNLOCK(lle);
+ struct lltable *llt = lle->lle_tbl;
+ nhops_hittime = nhops_get_hittime(llt->llt_ifp, llt->llt_af, lle);
+ if ((nhops_hittime != 0) && (nhops_hittime < lle_hittime))
+ lle_hittime = nhops_hittime;
+
return (lle_hittime);
}
@@ -643,6 +653,12 @@
}
lltable_unlink_entry(llt, lle);
+
+ /* Mark as invalid to invalidate the caches */
+ lle->r_flags &= ~RLLE_VALID;
+ lle->la_flags &= ~LLE_VALID;
+
+ nhops_update_neigh(ifp, llt->llt_af, lle);
IF_AFDATA_WUNLOCK(ifp);
llt->llt_delete_entry(llt, lle);
@@ -875,6 +891,7 @@
lltable_unlink_entry(llt, lle_tmp);
}
lltable_link_entry(llt, lle);
+ nhops_update_neigh(ifp, dst->sa_family, lle);
IF_AFDATA_WUNLOCK(ifp);
if (lle_tmp != NULL) {
Index: sys/net/route/nhop.h
===================================================================
--- sys/net/route/nhop.h
+++ sys/net/route/nhop.h
@@ -134,13 +134,10 @@
};
struct ifnet *nh_ifp; /* Logical egress interface. Always != NULL */
struct ifaddr *nh_ifa; /* interface address to use. Always != NULL */
- struct ifnet *nh_aifp; /* ifnet of the source address. Always != NULL */
+ void *nh_prepend_raw;/* PTR+len for nexthop prepend */
counter_u64_t nh_pksent; /* packets sent using this nhop */
/* 32 bytes + 4xPTR == 64(amd64) / 48(i386) */
- uint8_t nh_prepend_len; /* length of prepend data */
- uint8_t spare[3];
- uint32_t spare1; /* alignment */
- char nh_prepend[48]; /* L2 prepend */
+ struct ifnet *nh_aifp; /* ifnet of the source address. Always != NULL */
struct nhop_priv *nh_priv; /* control plane data */
/* -- 128 bytes -- */
};
@@ -163,6 +160,29 @@
_nh = NULL; \
} while (0)
+/*
+ * L2 prepend infrastructure definitions
+ * Nexthop L2 rewrites may change during nextop lifetime when the neighbor
+ * changes its MAC. For the most common encapsulations - ethernet & IB,
+ * the maximum encap length is 24 (IPoIB) = LLE_MAX_LINKHDR.
+ */
+#define L2_PREPEND_LEN_BITS CACHE_LINE_SHIFT
+#define L2_PREPEND_LEN_MAX ((1 << L2_PREPEND_LEN_BITS) - sizeof(struct epoch_context))
+
+#define _NH_L2_PREPEND_MASK_PTR(_p) ((uintptr_t)(_p) & ~((1 << L2_PREPEND_LEN_BITS) - 1))
+#define NH_L2_PREPEND_GET_PTR(_p) ((void *)_NH_L2_PREPEND_MASK_PTR(_p))
+#define NH_L2_PREPEND_GET_LEN(_p) ((uintptr_t)(_p) & ((1 << L2_PREPEND_LEN_BITS) - 1))
+
+#define NH_L2_COMPILE_PREPEND_PTR(_p, _l) ((void *)((uintptr_t)(_p) | (_l)))
+
+static inline void
+route_set_prepend_nh(struct route *ro, const struct nhop_object *nh)
+{
+ void *ptr = nh->nh_prepend_raw;
+ ro->ro_prepend = (char *)NH_L2_PREPEND_GET_PTR(ptr);
+ ro->ro_plen = NH_L2_PREPEND_GET_LEN(ptr);
+}
+
struct weightened_nhop {
struct nhop_object *nh;
uint32_t weight;
@@ -180,6 +200,15 @@
struct vnet *nhop_get_vnet(const struct nhop_object *nh);
struct nhop_object *nhop_select_func(struct nhop_object *nh, uint32_t flowid);
+void *nhop_alloc_prepend(size_t size);
+void nhop_free_prepend(void *prepend);
+bool nhop_update_prepend(struct nhop_object *nh, void *prepend, size_t len);
+
+void nhops_update_neigh(struct ifnet *ifp, int family, const struct llentry *lle);
+void nhops_request_feedback(struct ifnet *ifp, int family, const struct llentry *lle);
+void nhops_stop_feedback(struct ifnet *ifp, int family, const struct llentry *lle);
+time_t nhops_get_hittime(struct ifnet *ifp, int family, const struct llentry *lle);
+
#endif /* _KERNEL */
/* Kernel <> userland structures */
Index: sys/net/route/nhop.c
===================================================================
--- sys/net/route/nhop.c
+++ sys/net/route/nhop.c
@@ -362,6 +362,12 @@
return (priv_ret);
}
+bool
+is_nhop_linked(struct nhop_priv *nh_priv)
+{
+ return (nh_priv->nh_idx != 0);
+}
+
/*
* Searches for the nexthop by data specifcied in @nh_priv.
* Returns referenced nexthop or NULL.
Index: sys/net/route/nhop_ctl.c
===================================================================
--- sys/net/route/nhop_ctl.c
+++ sys/net/route/nhop_ctl.c
@@ -103,6 +103,27 @@
2 * CACHE_LINE_SIZE)
#define NHOP_PRIV_ALIGNED_SIZE roundup2(sizeof(struct nhop_priv), \
2 * CACHE_LINE_SIZE)
+
+static uma_zone_t nh_prepend_zone; /* Global zone for all nhop prepend data */
+
+struct nhop_prepend {
+ char prepend[L2_PREPEND_LEN_MAX];
+ struct epoch_context epoch_ctx;
+};
+
+#define NHOP_PREPEND_ALIGNED_SIZE roundup2(sizeof(struct nhop_prepend), \
+ CACHE_LINE_SIZE)
+/*
+ * Nexthop L2 rewrites may change during nextop lifetime when the neighbor
+ * changes its MAC. For the most common encapsulations - ethernet & IB,
+ * the maximum encap length is 24 (IPoIB) = LLE_MAX_LINKHDR.
+ *
+ */
+
+static bool nhop_update_prepend_locked(struct nhop_priv *nh_priv, void *prepend,
+ size_t len);
+
+
void
nhops_init(void)
{
@@ -110,6 +131,8 @@
nhops_zone = uma_zcreate("routing nhops",
NHOP_OBJECT_ALIGNED_SIZE + NHOP_PRIV_ALIGNED_SIZE,
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+ nh_prepend_zone = uma_zcreate("nhop prepend", NHOP_PREPEND_ALIGNED_SIZE,
+ NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0);
}
/*
@@ -571,11 +594,16 @@
* and return.
*/
DPRINTF("link_nhop failed!");
+ if (nh_priv->nh_priv_flags & NHF_PRIV_NEIGH)
+ nhop_unlink_neighbor(nh_priv);
destroy_nhop(nh_priv);
return (ENOBUFS);
}
+ if (nhop_need_neigh(nh) && nhop_link_neigh(nh_priv))
+ nh_priv->nh_priv_flags |= NHF_PRIV_NEIGH;
+
return (0);
}
@@ -627,6 +655,12 @@
ifa_free(nh->nh_ifa);
counter_u64_free(nh->nh_pksent);
+ if (nh->nh_prepend_raw != NULL) {
+ struct nhop_prepend *np;
+ np = (struct nhop_prepend *)NH_L2_PREPEND_GET_PTR(nh->nh_prepend_raw);
+ nhop_free_prepend(np);
+ }
+
uma_zfree(nhops_zone, nh);
}
@@ -690,6 +724,10 @@
NET_EPOCH_ENTER(et);
if (refcount_release_if_not_last(&nh_priv->nh_linked)) {
+ /* Stop receiving updates for neighbor prepends */
+ if (nh_priv->nh_priv_flags & NHF_PRIV_NEIGH)
+ nhop_unlink_neighbor(nh_priv);
+
ctl = nh_priv->nh_control;
if (unlink_nhop(ctl, nh_priv) == NULL) {
/* Do not try to reclaim */
@@ -850,8 +888,13 @@
pnhe->nh_mtu = nh->nh_mtu;
pnhe->nh_flags = nh->nh_flags;
- memcpy(pnhe->nh_prepend, nh->nh_prepend, sizeof(nh->nh_prepend));
- pnhe->prepend_len = nh->nh_prepend_len;
+ if (nh->nh_prepend_raw != NULL) {
+ void *ptr = nh->nh_prepend_raw;
+ pnhe->prepend_len = NH_L2_PREPEND_GET_LEN(ptr);
+ memcpy(pnhe->nh_prepend,
+ NH_L2_PREPEND_GET_PTR(ptr), pnhe->prepend_len);
+ } else
+ pnhe->prepend_len = 0;
pnhe->nh_refcount = nh->nh_priv->nh_refcnt;
pnhe->nh_pksent = counter_u64_fetch(nh->nh_pksent);
@@ -923,3 +966,74 @@
return (0);
}
+
+void *
+nhop_alloc_prepend(size_t size)
+{
+ if (size > L2_PREPEND_LEN_MAX)
+ return (NULL);
+ void *prepend = uma_zalloc(nh_prepend_zone, M_NOWAIT | M_ZERO);
+ return (prepend);
+}
+
+void
+nhop_free_prepend(void *prepend)
+{
+ uma_zfree(nh_prepend_zone, prepend);
+}
+
+static void
+destroy_nhop_prepend_epoch(epoch_context_t ctx)
+{
+ struct nhop_prepend *prepend;
+
+ prepend = __containerof(ctx, struct nhop_prepend, epoch_ctx);
+ nhop_free_prepend(prepend);
+}
+
+static bool
+nhop_update_prepend_locked(struct nhop_priv *nh_priv, void *prepend, size_t len)
+{
+ void *ptr = NULL, *old_ptr = NULL;
+ bool result = false;
+
+ if (prepend != NH_L2_PREPEND_GET_PTR(prepend)) {
+ //KASSERT();
+ /* XXX: check alignment */
+
+ prepend = NULL;
+ }
+ if (prepend != NULL)
+ ptr = NH_L2_COMPILE_PREPEND_PTR(prepend, len);
+
+ if (is_nhop_linked(nh_priv)) {
+ old_ptr = nh_priv->nh->nh_prepend_raw;
+ nh_priv->nh->nh_prepend_raw = ptr;
+ result = true;
+ }
+
+ if (old_ptr != NULL) {
+ struct nhop_prepend *np = NH_L2_PREPEND_GET_PTR(old_ptr);
+ epoch_call(net_epoch_preempt, destroy_nhop_prepend_epoch,
+ &np->epoch_ctx);
+ }
+
+ return (result);
+}
+
+bool
+nhop_update_prepend(struct nhop_object *nh, void *prepend, size_t len)
+{
+ struct nhop_priv *nh_priv = nh->nh_priv;
+ struct nh_control *ctl;
+ bool result;
+
+ ctl = nh_priv->nh_control;
+
+ NHOPS_WLOCK(ctl);
+ result = nhop_update_prepend_locked(nh_priv, prepend, len);
+ NHOPS_WUNLOCK(ctl);
+
+ return (result);
+}
+
Index: sys/net/route/nhop_neigh.c
===================================================================
--- /dev/null
+++ sys/net/route/nhop_neigh.c
@@ -0,0 +1,914 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include "opt_inet.h"
+#include "opt_route.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/rmlock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/if_var.h>
+#include <net/if_llatbl.h>
+#include <net/route.h>
+#include <net/route/route_var.h>
+#include <net/route/nhop_utils.h>
+#include <net/route/nhop.h>
+#include <net/route/nhop_var.h>
+#include <net/vnet.h>
+
+#include <netinet/in_var.h>
+
+/*
+ * This file contains data structures management logic for the nexthop ("nhop")
+ * route subsystem.
+ *
+ * Nexthops in the original sense are the objects containing all the necessary
+ * information to forward the packet to the selected destination.
+ * In particular, nexthop is defined by a combination of
+ * ifp, ifa, aifp, mtu, gw addr(if set), nh_type, nh_family, mask of rt_flags and
+ * NHF_DEFAULT
+ *
+ * All nexthops are stored in the resizable hash table.
+ * Additionally, each nexthop gets assigned its unique index (nexthop index)
+ * so userland programs can interact with the nexthops easier. Index allocation
+ * is backed by the bitmask array.
+ */
+
+#define DEBUG_MOD_NAME nhop_neigh
+#define DEBUG_MAX_LEVEL LOG_DEBUG
+#include <net/route/route_debug.h>
+_DECLARE_DEBUG(LOG_DEBUG2);
+
+CHT_SLIST_DEFINE(nhop_neighs, struct nhop_neigh);
+/* produce hash value for an object */
+#define nhop_neighs_hash_obj(_obj) hash_neigh(_obj)
+/* compare two objects */
+#define nhop_neighs_cmp(_one, _two) cmp_neigh(_one, _two)
+/* next object accessor */
+#define nhop_neighs_next(_obj) (_obj)->nn_next
+
+
+struct nn_control {
+ struct nhop_neighs_head nn_head;
+ struct rmlock nn_lock;
+ struct callout nn_feedback_callout;
+ TAILQ_HEAD(,nhop_neigh) nn_feedback_list;
+};
+
+VNET_DEFINE_STATIC(struct nn_control, nn_control);
+#define V_nn_control VNET(nn_control)
+
+#define CTL_WLOCK(ctl) rm_wlock(&ctl->nn_lock)
+#define CTL_WUNLOCK(ctl) rm_wunlock(&ctl->nn_lock)
+#define CTL_TRACKER struct rm_priotracker tracker
+#define CTL_RLOCK(ctl) rm_rlock(&ctl->nn_lock, &tracker)
+#define CTL_RUNLOCK(ctl) rm_runlock(&ctl->nn_lock, &tracker)
+
+struct nhop_neigh {
+ struct ifnet *nn_ifp;
+ uint8_t nn_neigh_family;
+ uint8_t nn_upper_family;
+ uint16_t nn_flags;
+ union {
+ struct in_addr nn_addr4;
+ struct in6_addr nn_addr6;
+ };
+ uint64_t nn_packets;
+ time_t nn_hittime;
+ struct mtx nn_lock;
+ struct nhop_neigh *nn_next;
+ TAILQ_HEAD(, nhop_priv) nn_nhops;
+ TAILQ_ENTRY(nhop_neigh) nn_feedback_entry;
+};
+#define NEIGH_END_CMP (__offsetof(struct nhop_neigh, nn_packets))
+
+#define NN_FLAG_FB_LINKED 0x01 /* Linked to the feedback list */
+
+#define NN_LOCK_INIT(nn) mtx_init(&(nn)->nn_lock, "nhop_neigh lock", NULL, MTX_DEF)
+#define NN_LOCK_DESTROY(nn) mtx_destroy(&(nn)->nn_lock)
+#define NN_LOCK(nn) mtx_lock(&(nn)->nn_lock)
+#define NN_UNLOCK(nn) mtx_unlock(&(nn)->nn_lock)
+
+_Static_assert(L2_PREPEND_LEN_MAX >= LLE_MAX_LINKHDR,
+ "CACHE_LINE_SIZE has to be at least LLE_MAX_LINKHDR");
+
+static void free_neigh(struct nhop_neigh *nn);
+static void update_prepend_ptr(struct nhop_object *nh, const struct llentry *lle);
+static void schedule_callout(struct nn_control *ctl);
+
+char *nhop_print_buf(const struct nhop_object *nh, char *buf, size_t bufsize);
+char *lle_print_buf(const struct llentry *lle, struct ifnet *ifp, int family, char *buf, size_t bufsize);
+char *lle_print_buf_lltable(const struct llentry *lle, char *buf, size_t bufsize);
+char *neigh_print_buf(const struct nhop_neigh *nn, char *buf, size_t bufsize);
+const char *rib_print_family(int family);
+
+#if 0
+static char
+af_to_char(int family)
+{
+ switch (family) {
+ case AF_INET:
+ return '4';
+ case AF_INET6:
+ return '6';
+ case AF_LINK:
+ return '*';
+ }
+ return 'X';
+}
+#endif
+
+static int
+lle_get_upper_family(const struct llentry *lle, int family)
+{
+ return (lle->r_family == 0 ? family : lle->r_family);
+}
+
+__noinline char *
+neigh_print_buf(const struct nhop_neigh *nn, char *buf, size_t bufsize)
+{
+ /* nn/inet/em0/192.168.0.1 */
+ char abuf[INET6_ADDRSTRLEN];
+
+ if (nn == NULL) {
+ snprintf(buf, bufsize, "nn/NULL");
+ return (buf);
+ }
+
+ switch (nn->nn_neigh_family) {
+ case AF_INET6:
+ inet_ntop(AF_INET6, &nn->nn_addr6, abuf, sizeof(abuf));
+ snprintf(buf, bufsize, "nn/%s/%s/%s",
+ rib_print_family(nn->nn_upper_family), if_name(nn->nn_ifp), abuf);
+ break;
+ case AF_INET:
+ inet_ntop(AF_INET, &nn->nn_addr4, abuf, sizeof(abuf));
+ snprintf(buf, bufsize, "nn/%s/%s/%s",
+ rib_print_family(nn->nn_upper_family), if_name(nn->nn_ifp), abuf);
+ break;
+ default:
+ snprintf(buf, bufsize, "nn/%s/%s/unknown(%s)",
+ rib_print_family(nn->nn_upper_family), if_name(nn->nn_ifp),
+ rib_print_family(nn->nn_neigh_family));
+ }
+
+ return (buf);
+}
+
+__noinline char *
+nhop_print_buf(const struct nhop_object *nh, char *buf, size_t bufsize)
+{
+ /* nh#33/inet/em0/192.168.0.1 */
+ char abuf[INET6_ADDRSTRLEN];
+ struct nhop_priv *nh_priv = nh->nh_priv;
+
+ const char *upper_str = rib_print_family(nh->nh_priv->nh_family);
+
+ switch (nh->gw_sa.sa_family) {
+ case AF_INET6:
+ inet_ntop(AF_INET6, &nh->gw6_sa.sin6_addr, abuf, sizeof(abuf));
+ snprintf(buf, bufsize, "nh#%d/%s/%s/%s", nh_priv->nh_idx, upper_str,
+ if_name(nh->nh_ifp), abuf);
+ break;
+ case AF_INET:
+ inet_ntop(AF_INET, &nh->gw4_sa.sin_addr, abuf, sizeof(abuf));
+ snprintf(buf, bufsize, "nh#%d/%s/%s/%s", nh_priv->nh_idx, upper_str,
+ if_name(nh->nh_ifp), abuf);
+ break;
+ case AF_LINK:
+ snprintf(buf, bufsize, "nh#%d/%s/%s/resolve", nh_priv->nh_idx, upper_str,
+ if_name(nh->nh_ifp));
+ break;
+ default:
+ snprintf(buf, bufsize, "nh#%d/%s/%s/????", nh_priv->nh_idx, upper_str,
+ if_name(nh->nh_ifp));
+ break;
+ }
+
+ return (buf);
+}
+
+__noinline char *
+lle_print_buf(const struct llentry *lle, struct ifnet *ifp, int family, char *buf, size_t bufsize)
+{
+ /* lle/4/V/em0/1.2.3.4 */
+ char abuf[INET6_ADDRSTRLEN];
+
+ const char *valid = (lle->r_flags & RLLE_VALID) ? "valid" : "no_l2";
+ const char *upper_str = rib_print_family(lle_get_upper_family(lle, family));
+
+ switch (family) {
+ case AF_INET:
+ inet_ntop(AF_INET, &lle->r_l3addr.addr4, abuf, sizeof(abuf));
+ snprintf(buf, bufsize, "lle/%s/%s/%s/%s", upper_str,
+ valid, if_name(ifp), abuf);
+ break;
+ case AF_INET6:
+ inet_ntop(AF_INET6, &lle->r_l3addr.addr6, abuf, sizeof(abuf));
+ snprintf(buf, bufsize, "lle/%s/%s/%s/%s", upper_str,
+ valid, if_name(ifp), abuf);
+ break;
+ default:
+ snprintf(buf, bufsize, "lle/%s/%s/%s/????", upper_str,
+ valid, if_name(ifp));
+ break;
+ }
+
+ return (buf);
+}
+
+__noinline char *
+lle_print_buf_lltable(const struct llentry *lle, char *buf, size_t bufsize)
+{
+ struct lltable *tbl = lle->lle_tbl;
+
+ return (lle_print_buf(lle, lltable_get_ifp(tbl), lltable_get_af(tbl), buf, bufsize));
+}
+
+const char *
+rib_print_family(int family)
+{
+
+ if (family == AF_INET)
+ return ("inet");
+ else if (family == AF_INET6)
+ return ("inet6");
+ else
+ return ("unknown");
+}
+
+
+void
+vnet_nhops_init_neigh(void)
+{
+ struct nn_control *ctl = &V_nn_control;
+ /*
+ * Allocate nexthop hash. Start with 16 items by default (128 bytes).
+ * This will be enough for most of the cases.
+ */
+ int num_buckets = 16;
+ size_t alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets);
+
+ void *ptr = malloc(alloc_size, M_NHOP, M_WAITOK | M_ZERO);
+ CHT_SLIST_INIT(&ctl->nn_head, ptr, num_buckets);
+ rm_init(&ctl->nn_lock, "nexthop neigh lock");
+ TAILQ_INIT(&ctl->nn_feedback_list);
+
+ callout_init(&ctl->nn_feedback_callout, 1);
+}
+
+void
+vnet_nhop_destroy_neigh(void)
+{
+ struct nn_control *ctl = &V_nn_control;
+ struct nhop_neigh *nn, *nn_tmp;
+
+ callout_drain(&ctl->nn_feedback_callout);
+
+ /*
+ *
+ * Close to the end.
+ * All relevant interfaces are set to if_down (~IFF_UP) so
+ * there shouldn't be any incoming traffic that can trigger
+ * ARP/ND updates.
+ * XXX: static records triggered by ndp?
+ * Should not be any routes - as it's called in the end of
+ * rtables_destroy()
+ */
+
+ CHT_SLIST_FOREACH_SAFE(&ctl->nn_head, nhop_neighs, nn, nn_tmp) {
+ free_neigh(nn);
+ } CHT_SLIST_FOREACH_END;
+
+ rm_destroy(&ctl->nn_lock);
+}
+
+/*
+ * Nexhop hash calculation:
+ */
+struct _hash_data {
+ uint16_t ifentropy;
+ uint8_t neigh_family;
+ uint8_t upper_family;
+ uint32_t addr;
+};
+
+static unsigned
+djb_hash(const unsigned char *h, const int len)
+{
+ unsigned int result = 0;
+ int i;
+
+ for (i = 0; i < len; i++)
+ result = 33 * result ^ h[i];
+
+ return (result);
+}
+
+static uint32_t
+hash_neigh(const struct nhop_neigh *nn)
+{
+ struct _hash_data key = {
+ .ifentropy = (uint16_t)((((uintptr_t)nn->nn_ifp) >> 6) & 0xFFFF),
+ .neigh_family = nn->nn_neigh_family,
+ .upper_family = nn->nn_upper_family,
+ .addr = (nn->nn_neigh_family == AF_INET6) ?
+ nn->nn_addr6.s6_addr32[3] : nn->nn_addr4.s_addr
+ };
+
+ return (uint32_t)(djb_hash((const unsigned char *)&key, sizeof(key)));
+}
+
+static int
+cmp_neigh(const struct nhop_neigh *_one, const struct nhop_neigh *_two)
+{
+
+ if (memcmp(_one, _two, NEIGH_END_CMP) != 0)
+ return (0);
+ return (1);
+}
+
+/*
+ * Searches for the nexthop neigh by data specified in @nh_priv.
+ * Returns referenced nexthop or NULL.
+ */
+static struct nhop_neigh *
+find_neigh(struct nn_control *ctl, const struct nhop_neigh *nn)
+{
+ struct nhop_neigh *nn_ret;
+
+ CHT_SLIST_FIND_BYOBJ(&ctl->nn_head, nhop_neighs, nn, nn_ret);
+ return (nn_ret);
+}
+
+static bool
+has_neigh(struct nn_control *ctl, const struct nhop_neigh *nn_base)
+{
+ CTL_TRACKER;
+ bool result;
+
+ CTL_RLOCK(ctl);
+ result = find_neigh(ctl, nn_base) != NULL;
+ CTL_RUNLOCK(ctl);
+
+ return (result);
+}
+
+/*
+ * Tries to resize neighbor hash to the value specified by @new_num_buckets.
+ */
+static void
+resize_neigh_hash(struct nn_control *ctl, uint32_t new_num_buckets)
+{
+ size_t alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_num_buckets);
+ void *nn_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
+ if (nn_ptr == NULL) {
+ /* allocations has failed. */
+ RT_LOG(LOG_NOTICE, "neigh hash resize to %u has failed", new_num_buckets);
+ return;
+ }
+
+ CTL_WLOCK(ctl);
+ RT_LOG(LOG_DEBUG, "going to resize neigh hash: %u -> %u",
+ ctl->nn_head.hash_size, new_num_buckets);
+ CHT_SLIST_RESIZE(&ctl->nn_head, nhop_neighs, nn_ptr, new_num_buckets);
+ CTL_WUNLOCK(ctl);
+
+ if (nn_ptr != NULL)
+ free(nn_ptr, M_NHOP);
+}
+
+/*
+ * Checks if nexthop @nh can be attached to the LLE/NDP neighbor.
+ * Function verifies that target interface has L2 and nexthop contains
+ * gateway (or is a host route).
+ * Returns true on success.
+ */
+bool
+nhop_need_neigh(const struct nhop_object *nh)
+{
+ bool match = false;
+
+ switch (nh->nh_ifp->if_type) {
+ case IFT_BRIDGE:
+ case IFT_ETHER:
+ case IFT_INFINIBAND:
+ case IFT_L2VLAN:
+ match = true;
+ break;
+ }
+
+ if (match) {
+ if (nh->nh_flags & (NHF_GATEWAY|NHF_HOST))
+ return (true);
+ }
+
+ return (false);
+}
+
+/*
+ * Fills in nhop_neigh data based on the nexthop specified by @nh_priv.
+ */
+static void
+init_neigh(struct nhop_neigh *nn, const struct nhop_priv *nh_priv)
+{
+ const struct nhop_object *nh = nh_priv->nh;
+
+ nn->nn_ifp = nh->nh_ifp;
+ nn->nn_neigh_family = nh->gw_sa.sa_family;
+ nn->nn_upper_family = nh_priv->nh_family;
+ switch (nn->nn_neigh_family) {
+ case AF_INET:
+ nn->nn_addr4 = nh->gw4_sa.sin_addr;
+ break;
+ case AF_INET6:
+ nn->nn_addr6 = nh->gw6_sa.sin6_addr;
+ break;
+ }
+ TAILQ_INIT(&nn->nn_nhops);
+ NN_LOCK_INIT(nn);
+}
+
+static void
+free_neigh(struct nhop_neigh *nn)
+{
+ NN_LOCK_DESTROY(nn);
+ free(nn, M_NHOP);
+}
+
+static struct llentry *
+find_lle(struct nhop_priv *nh_priv)
+{
+ void *afdata_ptr;
+ struct llentry *lle = NULL;
+ struct lltable *llt = NULL;
+ struct nhop_object *nh = nh_priv->nh;
+
+ switch (nh->gw_sa.sa_family) {
+ case AF_INET:
+ afdata_ptr = nh->nh_ifp->if_afdata[AF_INET];
+ if (afdata_ptr != NULL)
+ llt = ((struct in_ifinfo *)afdata_ptr)->ii_llt;
+ break;
+ case AF_INET6:
+ afdata_ptr = nh->nh_ifp->if_afdata[AF_INET6];
+ if (afdata_ptr != NULL)
+ llt = ((struct in6_ifextra *)afdata_ptr)->lltable;
+ break;
+ }
+
+ if (llt != NULL)
+ lle = lla_lookup(llt, LLE_UNLOCKED, &nh->gw_sa);
+ if (lle != NULL) {
+ if (nh_priv->nh_family != nh->gw_sa.sa_family)
+ lle = llentry_lookup_family(lle, nh_priv->nh_family);
+ }
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG
+ char nhbuf[48], lbuf[48];
+ FIB_NH_LOG(LOG_DEBUG, nh, "nhop %s: mapped to lle %s",
+ nhop_print_buf(nh, nhbuf, sizeof(nhbuf)),
+ lle ? lle_print_buf(lle, nh->nh_ifp, nh->gw_sa.sa_family, lbuf, sizeof(lbuf)) : "NULL");
+#endif
+ return (lle);
+}
+
+/*
+ * Links nextop @nh_priv to the nexhop neighbor hash table and tries
+ * to fill in L2 nexthop prepend.
+ * Returns true on successful linkage.
+ */
+bool
+nhop_link_neigh(struct nhop_priv *nh_priv)
+{
+ uint32_t num_buckets_new;
+ struct nn_control *ctl = &V_nn_control;
+ struct nhop_neigh *nn = NULL, *nn_new;
+
+ NET_EPOCH_ASSERT();
+
+ /*
+ * Most llentries have at most one nexthop attached.
+ * Thus, assume we'll be inserting a new record.
+ */
+
+ nn_new = malloc(sizeof(struct nhop_neigh), M_NHOP, M_NOWAIT | M_ZERO);
+ if (nn_new == NULL)
+ return (false);
+ init_neigh(nn_new, nh_priv);
+
+ /* Try to calculate the prepend */
+ struct llentry *lle = find_lle(nh_priv);
+ if (lle != NULL)
+ update_prepend_ptr(nh_priv->nh, lle);
+
+ CTL_WLOCK(ctl);
+
+ /*
+ * Check if we need to resize hash and index.
+ * The following 2 functions returns either new size or 0
+ * if resize is not required.
+ */
+ num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->nn_head);
+
+ /* Check if already exists */
+ CHT_SLIST_FIND_BYOBJ(&ctl->nn_head, nhop_neighs, nn_new, nn);
+
+ if (nn == NULL) {
+ nn = nn_new;
+ nn_new = NULL;
+ CHT_SLIST_INSERT_HEAD(&ctl->nn_head, nhop_neighs, nn);
+
+ /*
+ * XXXME: There can be a race when lle gets deleted after lookup
+ */
+ }
+ TAILQ_INSERT_TAIL(&nn->nn_nhops, nh_priv, nh_neigh_entry);
+
+ CTL_WUNLOCK(ctl);
+
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG
+ char nhbuf[48], nnbuf[48];
+ FIB_NH_LOG(LOG_DEBUG, nh_priv->nh, "nhop %s linked to %s %s",
+ nhop_print_buf(nh_priv->nh, nhbuf, sizeof(nhbuf)),
+ nn_new == NULL ? "new" : "existing",
+ neigh_print_buf(nn, nnbuf, sizeof(nnbuf)));
+#endif
+
+ if (nn_new != NULL)
+ free_neigh(nn_new);
+
+ if (num_buckets_new > 0)
+ resize_neigh_hash(ctl, num_buckets_new);
+
+ return (true);
+}
+
+/*
+ * Unlinks nexthop specified by @nh_priv data.
+ */
+void
+nhop_unlink_neighbor(struct nhop_priv *nh_priv)
+{
+ struct nn_control *ctl = &V_nn_control;
+ uint32_t num_buckets_new;
+ struct nhop_neigh *nn, *nn_del = NULL, nn_base = {};
+
+ init_neigh(&nn_base, nh_priv);
+
+ CTL_WLOCK(ctl);
+
+ nn = find_neigh(ctl, &nn_base);
+ if (nn != NULL) {
+ TAILQ_REMOVE(&nn->nn_nhops, nh_priv, nh_neigh_entry);
+ if (TAILQ_EMPTY(&nn->nn_nhops)) {
+ CHT_SLIST_REMOVE(&ctl->nn_head, nhop_neighs, nn, nn_del);
+ }
+ }
+
+ /* Check if hash or index needs to be resized */
+ num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->nn_head);
+
+ CTL_WUNLOCK(ctl);
+
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG
+ char nhbuf[48], nnbuf[48];
+ FIB_NH_LOG(LOG_DEBUG, nh_priv->nh, "nhop %s unlinked from the neigh %s%s",
+ nhop_print_buf(nh_priv->nh, nhbuf, sizeof(nhbuf)),
+ neigh_print_buf(nn, nnbuf, sizeof(nnbuf)),
+ nn_del == NULL ? "" : " (last entry)");
+#endif
+
+ if (nn_del != NULL)
+ free_neigh(nn_del);
+
+ if (num_buckets_new > 0)
+ resize_neigh_hash(ctl, num_buckets_new);
+}
+
+/*
+ * Updates nhop @nh L2 prepend data with the pre-calculated prepend
+ * in @lle. If @lle contains no valid data, removes an existing L2 prepend.
+ */
+static void
+update_prepend_ptr(struct nhop_object *nh, const struct llentry *lle)
+{
+ void *prepend = NULL;
+ int prepend_len = 0;
+
+ if (lle->r_flags & RLLE_VALID) {
+ prepend_len = lle->r_hdrlen;
+ prepend = nhop_alloc_prepend(prepend_len);
+ if (prepend != NULL)
+ memcpy(prepend, lle->r_linkdata, prepend_len);
+ }
+
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG
+ char nhbuf[48], lbuf[48];
+ FIB_NH_LOG(LOG_DEBUG, nh, "nhop %s sync L2 from %s",
+ nhop_print_buf(nh, nhbuf, sizeof(nhbuf)),
+ lle_print_buf(lle, nh->nh_ifp, nh->gw_sa.sa_family, lbuf, sizeof(lbuf)));
+#endif
+
+ nhop_update_prepend(nh, prepend, prepend_len);
+}
+
+/*
+ * Hook called by the LLE subsystem notifying of the changed L2 prepend
+ * for the @lle entry.
+ * Function searches the matching neigh entry and updates NH L2 prepend
+ * for all of the registered nexthops.
+ */
+void
+nhops_update_neigh(struct ifnet *ifp, int family, const struct llentry *lle)
+{
+ CTL_TRACKER;
+ struct nn_control *ctl = &V_nn_control;
+
+ NET_EPOCH_ASSERT();
+
+ if (VNET_IS_DYING(curvnet))
+ return;
+
+ struct nhop_neigh nn_base = {
+ .nn_ifp = ifp,
+ .nn_upper_family = lle_get_upper_family(lle, family),
+ .nn_neigh_family = family,
+ .nn_addr6 = lle->r_l3addr.addr6,
+ };
+
+ bool matched = has_neigh(ctl, &nn_base);
+
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG2
+ char lbuf[48];
+ RT_LOG(LOG_DEBUG2, "L2 prepend update from %s (matched: %s)",
+ lle_print_buf(lle, ifp, family, lbuf, sizeof(lbuf)),
+ matched ? "true" : "false");
+#endif
+
+ if (!matched)
+ return;
+
+ CTL_RLOCK(ctl);
+ struct nhop_neigh *nn = find_neigh(ctl, &nn_base);
+ if (nn != NULL) {
+ struct nhop_priv *nh_priv;
+
+ TAILQ_FOREACH(nh_priv, &nn->nn_nhops, nh_neigh_entry)
+ update_prepend_ptr(nh_priv->nh, lle);
+ }
+ CTL_RUNLOCK(ctl);
+}
+
+
+/*
+ * LLE validity.
+ * Both ARP and ND state machines requires datapath-liveness checking
+ * as a step of expiring an lle entry. Additionally, ND state machine
+ * requires exact timestamp of the first packet traversing LLE after the
+ * liveness checking request, so it can execute check callouts less often
+ * (STALE -> DELAY -> PROBE).
+ *
+ * Thus, upon receiving the request to check dataplane liveness from LLE layers,
+ * the code below adds matching neigh entry to the feedback list and fires
+ * per-VNET callout on per-second basis, recording the first time when the
+ * packet is traversed.
+ *
+ * Neighs are removed from the list in 2 ways: the first is done by the callout
+ * upon recording the timestamp, the second is LLE code removing the matching
+ * LLE.
+ *
+ */
+
+/*
+ * Returns total count of all packets that traversed the nexthops
+ * registered in the @nn.
+ */
+static uint64_t
+calc_pktsent(struct nhop_neigh *nn)
+{
+ uint64_t nn_packets = 0;
+ struct nhop_priv *nh_priv;
+
+ TAILQ_FOREACH(nh_priv, &nn->nn_nhops, nh_neigh_entry)
+ nn_packets += counter_u64_fetch(nh_priv->nh->nh_pksent);
+ return (nn_packets);
+}
+
+/*
+ * Callout that is called every second to check if the cumulative amount
+ * of packets traversing relevant neigh entries has changed. If the change
+ * is observed, record the change time and removes entry from the list.
+ *
+ * Note: removing nexthops from the neigh entry results in false positive.
+ * However, as the value is used to check if the underlying lle is still used,
+ * the worst that can happen, is that the entry will be kept slightly longer
+ * before the deletion.
+ */
+static void
+pktsent_callout(void *_arg)
+{
+ struct nn_control *ctl = (struct nn_control *)_arg;
+ struct nhop_neigh *nn, *nn_tmp;
+ bool empty;
+
+ CTL_WLOCK(ctl);
+
+ TAILQ_FOREACH_SAFE(nn, &ctl->nn_feedback_list, nn_feedback_entry, nn_tmp) {
+ if (nn->nn_packets != calc_pktsent(nn)) {
+ nn->nn_packets = 0;
+ nn->nn_hittime = time_uptime;
+ nn->nn_flags &= ~NN_FLAG_FB_LINKED;
+ TAILQ_REMOVE(&ctl->nn_feedback_list, nn, nn_feedback_entry);
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG2
+ char nnbuf[48];
+ RT_LOG(LOG_DEBUG2, "L2 neigh %s got datapath feedback at %lu",
+ neigh_print_buf(nn, nnbuf, sizeof(nnbuf)),
+ nn->nn_hittime);
+#endif
+ }
+ }
+ empty = TAILQ_EMPTY(&ctl->nn_feedback_list);
+ CTL_WUNLOCK(ctl);
+ if (!empty)
+ schedule_callout(ctl);
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG2
+ else
+ RT_LOG(LOG_DEBUG2, "datapath callout stopped");
+#endif
+}
+
+static void
+schedule_callout(struct nn_control *ctl)
+{
+ if (callout_pending(&ctl->nn_feedback_callout))
+ return;
+ callout_reset_sbt(&ctl->nn_feedback_callout, SBT_1S * 1, 0,
+ pktsent_callout, ctl, 0);
+}
+
+static void
+update_feedback_membership(struct ifnet *ifp, int family, const struct llentry *lle,
+ bool add)
+{
+ struct nn_control *ctl = &V_nn_control;
+ struct nhop_neigh *nn;
+ bool need_callout = false;
+
+ NET_EPOCH_ASSERT();
+
+ if (VNET_IS_DYING(curvnet))
+ return;
+
+ struct nhop_neigh nn_base = {
+ .nn_ifp = ifp,
+ .nn_upper_family = lle_get_upper_family(lle, family),
+ .nn_neigh_family = family,
+ .nn_addr6 = lle->r_l3addr.addr6,
+ };
+
+ /* Most of LLEs do not have mapped nhops, so fail early */
+ bool matched = has_neigh(ctl, &nn_base);
+
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG2
+ char lbuf[48];
+ lle_print_buf(lle, ifp, family, lbuf, sizeof(lbuf));
+ if (matched) {
+ RT_LOG(LOG_DEBUG2, "%s datapath feedback for %s", add ? "request" : "abort", lbuf);
+ } else {
+ RT_LOG(LOG_DEBUG3, "%s datapath feedback for %s (nomatch)", add ? "request" : "abort", lbuf);
+ }
+#endif
+
+ if (!matched)
+ return;
+
+ CTL_WLOCK(ctl);
+ nn = find_neigh(ctl, &nn_base);
+ if (nn != NULL) {
+ if (add) {
+ nn->nn_packets = calc_pktsent(nn);
+ nn->nn_hittime = 0;
+
+ if (!(nn->nn_flags & NN_FLAG_FB_LINKED)) {
+ nn->nn_flags |= NN_FLAG_FB_LINKED;
+ need_callout = TAILQ_EMPTY(&ctl->nn_feedback_list);
+ TAILQ_INSERT_TAIL(&ctl->nn_feedback_list, nn, nn_feedback_entry);
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG2
+ char llbuf[48], nnbuf[48];
+ RT_LOG(LOG_DEBUG2, "added %s to datapath feedback for %s",
+ neigh_print_buf(nn, nnbuf, sizeof(nnbuf)),
+ lle_print_buf(lle, ifp, family, llbuf, sizeof(llbuf)));
+#endif
+ }
+ } else {
+ /* Remove from the list */
+ if (nn->nn_flags & NN_FLAG_FB_LINKED) {
+ nn->nn_flags &= ~NN_FLAG_FB_LINKED;
+ TAILQ_REMOVE(&ctl->nn_feedback_list, nn, nn_feedback_entry);
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG2
+ char llbuf[48],
+ nnbuf[48];
+ RT_LOG(LOG_DEBUG2, "removed %s from datapath feedback for %s",
+ neigh_print_buf(nn, nnbuf, sizeof(nnbuf)),
+ lle_print_buf(lle, ifp, family, llbuf, sizeof(llbuf)));
+#endif
+ }
+ }
+ }
+ CTL_WUNLOCK(ctl);
+ if (need_callout) {
+ schedule_callout(ctl);
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG2
+ RT_LOG(LOG_DEBUG2, "datapath callout started");
+#endif
+ }
+}
+
+void
+nhops_request_feedback(struct ifnet *ifp, int family, const struct llentry *lle)
+{
+ update_feedback_membership(ifp, family, lle, true);
+}
+
+void
+nhops_stop_feedback(struct ifnet *ifp, int family, const struct llentry *lle)
+{
+ update_feedback_membership(ifp, family, lle, false);
+}
+
+/*
+ * Returns the timestamp of the first packet traversing the nexhops matching @lle
+ * after nhops_request_feedback() call.
+ */
+time_t
+nhops_get_hittime(struct ifnet *ifp, int family, const struct llentry *lle)
+{
+ struct nn_control *ctl = &V_nn_control;
+ struct nhop_neigh *nn;
+ time_t hittime = 0;
+ CTL_TRACKER;
+
+ NET_EPOCH_ASSERT();
+
+ if (VNET_IS_DYING(curvnet))
+ return (0);
+
+ struct nhop_neigh nn_base = {
+ .nn_ifp = ifp,
+ .nn_upper_family = lle_get_upper_family(lle, family),
+ .nn_neigh_family = family,
+ .nn_addr6 = lle->r_l3addr.addr6,
+ };
+
+ CTL_RLOCK(ctl);
+ nn = find_neigh(ctl, &nn_base);
+ if (nn != NULL)
+ hittime = nn->nn_hittime;
+ CTL_RUNLOCK(ctl);
+
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG2
+ if (nn != NULL) {
+ char lbuf[48], nnbuf[48];
+ RT_LOG(LOG_DEBUG2, "%s datapath feedback returned %lu from %s",
+ lle_print_buf(lle, ifp, family, lbuf, sizeof(lbuf)),
+ hittime, neigh_print_buf(nn, nnbuf, sizeof(nnbuf)));
+ }
+#endif
+
+ return (hittime);
+}
Index: sys/net/route/nhop_utils.h
===================================================================
--- sys/net/route/nhop_utils.h
+++ sys/net/route/nhop_utils.h
@@ -139,6 +139,11 @@
for (_x = CHT_FIRST(_head, _i); _x; _x = _PX##_next(_x))
#define CHT_SLIST_FOREACH_END }
+#define CHT_SLIST_FOREACH_SAFE(_head, _PX, _x, _t) \
+ for (uint32_t _i = 0; _i < (_head)->hash_size; _i++) { \
+ for (_x = CHT_FIRST(_head, _i); (_x) && (_t = _PX##_next(_x)); _x = _t)
+#define CHT_SLIST_FOREACH_SAFE_END }
+
#define CHT_SLIST_RESIZE(_head, _PX, _new_void_ptr, _new_hsize) \
uint32_t _new_idx; \
typeof((_head)->ptr) _new_ptr = (void *)_new_void_ptr; \
Index: sys/net/route/nhop_var.h
===================================================================
--- sys/net/route/nhop_var.h
+++ sys/net/route/nhop_var.h
@@ -80,12 +80,14 @@
uint32_t rt_flags; /* routing flags for the control plane */
/* nhop lookup comparison end */
uint32_t nh_idx; /* nexthop index */
+ uint32_t nh_priv_flags; /* non user-visible flags */
void *cb_func; /* function handling additional rewrite caps */
u_int nh_refcnt; /* number of references, refcount(9) */
u_int nh_linked; /* refcount(9), == 2 if linked to the list */
struct nhop_object *nh; /* backreference to the dataplane nhop */
struct nh_control *nh_control; /* backreference to the rnh */
struct nhop_priv *nh_next; /* hash table membership */
+ TAILQ_ENTRY(nhop_priv) nh_neigh_entry; /* neigh membership */
struct vnet *nh_vnet; /* vnet nhop belongs to */
struct epoch_context nh_epoch_ctx; /* epoch data for nhop */
};
@@ -95,13 +97,22 @@
#define NH_IS_PINNED(_nh) ((!NH_IS_NHGRP(_nh)) && \
((_nh)->nh_priv->rt_flags & RTF_PINNED))
+#define NHF_PRIV_NEIGH 0x01 /* linked to a neighbor record */
+
/* nhop.c */
struct nhop_priv *find_nhop(struct nh_control *ctl,
const struct nhop_priv *nh_priv);
int link_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv);
struct nhop_priv *unlink_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv);
+bool is_nhop_linked(struct nhop_priv *nh_priv);
/* nhop_ctl.c */
int cmp_priv(const struct nhop_priv *_one, const struct nhop_priv *_two);
+/* nhop_neigh.c */
+
+bool nhop_need_neigh(const struct nhop_object *nh);
+bool nhop_link_neigh(struct nhop_priv *nh_priv);
+void nhop_unlink_neighbor(struct nhop_priv *nh_priv);
+
#endif
Index: sys/net/route/route_ctl.c
===================================================================
--- sys/net/route/route_ctl.c
+++ sys/net/route/route_ctl.c
@@ -118,6 +118,9 @@
VNET_DEFINE_STATIC(uma_zone_t, rtzone);
#define V_rtzone VNET(rtzone)
+/* Debug bits */
+SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+
void
vnet_rtzone_init()
{
Index: sys/net/route/route_debug.h
===================================================================
--- /dev/null
+++ sys/net/route/route_debug.h
@@ -0,0 +1,126 @@
+/*-
+ * Copyright (c) 2021
+ * Alexander V. Chernikov <melifaro@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_ROUTE_DEBUG_H_
+#define _NET_ROUTE_DEBUG_H_
+
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+
+const char *rib_print_family(int family);
+
+static inline uint32_t
+nhop_get_fibnum(const struct nhop_object *nh)
+{
+ return (0);
+}
+
+/* DEBUG logic */
+#if defined(DEBUG_MOD_NAME) && defined(DEBUG_MAX_LEVEL)
+#define _DEBUG_PASS_MSG(_l) (DEBUG_MOD_NAME##_debug_level >= (_l))
+
+#define OID_NAME _OID_NAME(DEBUG_MOD_NAME)
+#define _OID_NAME(a) _OID_NAME_INDIRECT(a)
+#define _OID_NAME_INDIRECT(prefix) prefix##_debug_level
+
+#define SPREFIX _SPREFIX(DEBUG_MOD_NAME)
+#define _SPREFIX(a) __SPREFIX(a)
+#define __SPREFIX(a) #a
+
+
+#define _DECLARE_DEBUG(_default_level) \
+ SYSCTL_DECL(_net_route_debug); \
+ static int DEBUG_MOD_NAME##_debug_level = _default_level; \
+ SYSCTL_INT(_net_route_debug, OID_AUTO, OID_NAME,\
+ CTLFLAG_RW | CTLFLAG_RWTUN, \
+ &(DEBUG_MOD_NAME##_debug_level), 0, "debuglevel")
+
+#ifndef LOG_DEBUG2
+#define LOG_DEBUG2 8
+#endif
+#ifndef LOG_DEBUG3
+#define LOG_DEBUG3 9
+#endif
+
+#define _output printf
+
+#define _FIB_LOG(_l, _fib, _fam, _fmt, ...) if (_DEBUG_PASS_MSG(_l)) { \
+ _output("[" SPREFIX "] %s.%u %s: " _fmt "\n", rib_print_family(_fam), _fib, __func__, ##__VA_ARGS__); \
+}
+#define FIB_LOG(_l, _fib, _fam, _fmt, ...) FIB_LOG_##_l(_l, _fib, _fam, _fmt, ## __VA_ARGS__)
+
+#define FIB_NH_LOG(_l, _nh, _fmt, ...) FIB_LOG_##_l(_l, nhop_get_fibnum(_nh), (_nh)->gw_sa.sa_family, _fmt, ## __VA_ARGS__)
+
+#define _RT_LOG(_l, _fmt, ...) if (_DEBUG_PASS_MSG(_l)) { \
+ _output("[" SPREFIX "] %s: " _fmt "\n", __func__, ##__VA_ARGS__); \
+}
+#define RT_LOG(_l, _fmt, ...) RT_LOG_##_l(_l, _fmt, ## __VA_ARGS__)
+
+
+#if DEBUG_MAX_LEVEL>=LOG_DEBUG2
+#define FIB_LOG_LOG_DEBUG3 _FIB_LOG
+#define RT_LOG_LOG_DEBUG3 _RT_LOG
+#else
+#define FIB_LOG_LOG_DEBUG3(_l, _fib, _fam, _fmt, ...)
+#define RT_LOG_LOG_DEBUG3(_l, _fmt, ...)
+#endif
+#if DEBUG_MAX_LEVEL>=LOG_DEBUG2
+#define FIB_LOG_LOG_DEBUG2 _FIB_LOG
+#define RT_LOG_LOG_DEBUG2 _RT_LOG
+#else
+#define FIB_LOG_LOG_DEBUG2(_l, _fib, _fam, _fmt, ...)
+#define RT_LOG_LOG_DEBUG2(_l, _fmt, ...)
+#endif
+#if DEBUG_MAX_LEVEL>=LOG_DEBUG
+#define FIB_LOG_LOG_DEBUG _FIB_LOG
+#define RT_LOG_LOG_DEBUG _RT_LOG
+#else
+#define FIB_LOG_LOG_DEBUG(_l, _fib, _fam, _fmt, ...)
+#define RT_LOG_LOG_DEBUG(_l, _fmt, ...)
+#endif
+#if DEBUG_MAX_LEVEL>=LOG_INFO
+#define FIB_LOG_LOG_INFO _FIB_LOG
+#define RT_LOG_LOG_INFO _RT_LOG
+#else
+#define FIB_LOG_LOG_INFO(_l, _fib, _fam, _fmt, ...)
+#define RT_LOG_LOG_INFO(_l, _fmt, ...)
+#endif
+#define FIB_LOG_LOG_NOTICE _FIB_LOG
+#define FIB_LOG_LOG_ERR _FIB_LOG
+#define FIB_LOG_LOG_WARNING _FIB_LOG
+#define RT_LOG_LOG_NOTICE _RT_LOG
+#define RT_LOG_LOG_ERR _RT_LOG
+#define RT_LOG_LOG_WARNING _RT_LOG
+
+
+#endif
+
+#endif
\ No newline at end of file
Index: sys/net/route/route_helpers.c
===================================================================
--- sys/net/route/route_helpers.c
+++ sys/net/route/route_helpers.c
@@ -571,3 +571,4 @@
return (NULL);
}
#endif
+
Index: sys/net/route/route_tables.c
===================================================================
--- sys/net/route/route_tables.c
+++ sys/net/route/route_tables.c
@@ -262,6 +262,8 @@
#ifdef FIB_ALGO
vnet_fib_init();
#endif
+ vnet_nhops_init_neigh();
+
RTABLES_LOCK_INIT();
RTABLES_LOCK();
@@ -306,6 +308,7 @@
#ifdef FIB_ALGO
vnet_fib_destroy();
#endif
+ vnet_nhop_destroy_neigh();
}
VNET_SYSUNINIT(rtables_destroy, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST,
rtables_destroy, 0);
Index: sys/net/route/route_var.h
===================================================================
--- sys/net/route/route_var.h
+++ sys/net/route/route_var.h
@@ -247,6 +247,8 @@
void nhops_init(void);
int nhops_init_rib(struct rib_head *rh);
void nhops_destroy_rib(struct rib_head *rh);
+void vnet_nhops_init_neigh(void);
+void vnet_nhop_destroy_neigh(void);
void nhop_ref_object(struct nhop_object *nh);
int nhop_try_ref_object(struct nhop_object *nh);
void nhop_ref_any(struct nhop_object *nh);
Index: sys/net/vnet.h
===================================================================
--- sys/net/vnet.h
+++ sys/net/vnet.h
@@ -240,6 +240,8 @@
extern struct vnet *vnet0;
#define IS_DEFAULT_VNET(arg) ((arg) == vnet0)
+#define VNET_IS_DYING(_vnet) \
+ ((_vnet)->vnet_shutdown && (_vnet)->vnet_state < SI_SUB_VNET_DONE)
#define CRED_TO_VNET(cr) (cr)->cr_prison->pr_vnet
#define TD_TO_VNET(td) CRED_TO_VNET((td)->td_ucred)
Index: sys/netinet/if_ether.c
===================================================================
--- sys/netinet/if_ether.c
+++ sys/netinet/if_ether.c
@@ -1001,6 +1001,7 @@
if (la_tmp == NULL) {
arp_mark_lle_reachable(la);
LLE_WUNLOCK(la);
+ nhops_update_neigh(ifp, AF_INET, la);
} else {
/* Free newly-create entry and handle packet */
lltable_free_entry(LLTABLE(ifp), la);
@@ -1239,8 +1240,11 @@
lladdr_off) == 0)
return;
+ nhops_update_neigh(ifp, AF_INET, la);
+
/* Clear fast path feedback request if set */
llentry_mark_used(la);
+ nhops_stop_feedback(ifp, AF_INET, la);
}
arp_mark_lle_reachable(la);
Index: sys/netinet/ip_fastfwd.c
===================================================================
--- sys/netinet/ip_fastfwd.c
+++ sys/netinet/ip_fastfwd.c
@@ -433,6 +433,7 @@
ro.ro_flags |= RT_HAS_GW;
} else
gw = (const struct sockaddr *)dst;
+ route_set_prepend_nh(&ro, nh);
/*
* Handle redirect case.
Index: sys/netinet/ip_output.c
===================================================================
--- sys/netinet/ip_output.c
+++ sys/netinet/ip_output.c
@@ -301,6 +301,8 @@
ro->ro_flags |= (nh_flags & NHF_REJECT) ? RT_REJECT : 0;
ro->ro_flags |= (nh_flags & NHF_BLACKHOLE) ? RT_BLACKHOLE : 0;
ro->ro_flags |= (nh_flags & NHF_GATEWAY) ? RT_HAS_GW : 0;
+
+ route_set_prepend_nh(ro, nh);
}
/*
Index: sys/netinet6/nd6.c
===================================================================
--- sys/netinet6/nd6.c
+++ sys/netinet6/nd6.c
@@ -698,10 +698,10 @@
delay = (long)ND_IFINFO(ifp)->retrans * hz / 1000;
break;
case ND6_LLINFO_REACHABLE:
- if (!ND6_LLINFO_PERMANENT(lle)) {
- ifp = lle->lle_tbl->llt_ifp;
+ ifp = lle->lle_tbl->llt_ifp;
+ if (!ND6_LLINFO_PERMANENT(lle))
delay = (long)ND_IFINFO(ifp)->reachable * hz;
- }
+ nhops_stop_feedback(ifp, AF_INET6, lle);
break;
case ND6_LLINFO_STALE:
@@ -1420,6 +1420,7 @@
/* Update data */
lltable_set_entry_addr(ifp, lle, buf, sz, off);
+ nhops_update_neigh(ifp, AF_INET6, lle);
struct llentry *child_lle;
CK_SLIST_FOREACH(child_lle, &lle->lle_children, lle_child_next) {
@@ -1429,6 +1430,7 @@
if (lltable_calc_llheader(ifp, fam, lladdr, buf, &sz, &off) == 0) {
/* success */
lltable_set_entry_addr(ifp, child_lle, buf, sz, off);
+ nhops_update_neigh(ifp, AF_INET6, child_lle);
child_lle->ln_state = ND6_LLINFO_REACHABLE;
}
LLE_WUNLOCK(child_lle);
@@ -2052,6 +2054,7 @@
if (ln_tmp == NULL) {
/* No existing lle, mark as new entry (6,7) */
is_newentry = 1;
+ nhops_update_neigh(ifp, AF_INET6, ln);
if (lladdr != NULL) { /* (7) */
nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
EVENTHANDLER_INVOKE(lle_event, ln,
Index: usr.bin/netstat/nhops.c
===================================================================
--- usr.bin/netstat/nhops.c
+++ usr.bin/netstat/nhops.c
@@ -312,8 +312,9 @@
xo_emit("{t:refcount/%*lu} ", wid_refcnt, nh->nh_refcount);
if (Wflag && nh->prepend_len) {
- char *prepend_hex = "AABBCCDDEE";
- xo_emit(" {:nhop-prepend/%*s}", wid_prepend, prepend_hex);
+ for (int i = 0; i < nh->prepend_len; i++)
+ snprintf(&buffer[i * 2], 3, "%02X", nh->nh_prepend[i]);
+ xo_emit(" {:nhop-prepend/%*s}", wid_prepend, buffer);
}
xo_emit("\n");

File Metadata

Mime Type
text/plain
Expires
Fri, Nov 15, 9:10 PM (15 h, 11 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14647993
Default Alt Text
D33658.id100573.diff (47 KB)

Event Timeline