Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F102971136
D33658.id100573.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
47 KB
Referenced Files
None
Subscribers
None
D33658.id100573.diff
View Options
Index: sys/conf/files
===================================================================
--- sys/conf/files
+++ sys/conf/files
@@ -4156,6 +4156,7 @@
net/route/nhgrp_ctl.c optional route_mpath
net/route/nhop.c standard
net/route/nhop_ctl.c standard
+net/route/nhop_neigh.c standard
net/route/nhop_utils.c standard
net/route/fib_algo.c optional fib_algo
net/route/route_ctl.c standard
Index: sys/net/if.c
===================================================================
--- sys/net/if.c
+++ sys/net/if.c
@@ -337,11 +337,6 @@
SX_SYSINIT_FLAGS(ifnet_detach, &ifnet_detach_sxlock, "ifnet_detach_sx",
SX_RECURSE);
-#ifdef VIMAGE
-#define VNET_IS_SHUTTING_DOWN(_vnet) \
- ((_vnet)->vnet_shutdown && (_vnet)->vnet_state < SI_SUB_VNET_DONE)
-#endif
-
static if_com_alloc_t *if_com_alloc[256];
static if_com_free_t *if_com_free[256];
@@ -1122,7 +1117,7 @@
#ifdef VIMAGE
bool shutdown;
- shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet);
+ shutdown = VNET_IS_DYING(ifp->if_vnet);
#endif
/*
@@ -1367,7 +1362,7 @@
}
/* Make sure the VNET is stable. */
- shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet);
+ shutdown = VNET_IS_DYING(ifp->if_vnet);
if (shutdown) {
CURVNET_RESTORE();
prison_free(pr);
@@ -1425,7 +1420,7 @@
}
/* Make sure the VNET is stable. */
- shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet);
+ shutdown = VNET_IS_DYING(ifp->if_vnet);
if (shutdown) {
CURVNET_RESTORE();
prison_free(pr);
@@ -2868,7 +2863,7 @@
CURVNET_SET(so->so_vnet);
#ifdef VIMAGE
/* Make sure the VNET is stable. */
- shutdown = VNET_IS_SHUTTING_DOWN(so->so_vnet);
+ shutdown = VNET_IS_DYING(so->so_vnet);
if (shutdown) {
CURVNET_RESTORE();
return (EBUSY);
Index: sys/net/if_llatbl.c
===================================================================
--- sys/net/if_llatbl.c
+++ sys/net/if_llatbl.c
@@ -59,6 +59,7 @@
#include <net/if_var.h>
#include <net/route.h>
#include <net/route/route_ctl.h>
+#include <net/route/nhop.h>
#include <net/vnet.h>
#include <netinet/if_ether.h>
#include <netinet6/in6_var.h>
@@ -428,15 +429,19 @@
llentry_request_feedback(struct llentry *lle)
{
struct llentry *child_lle;
+ struct ifnet *ifp = lle->lle_tbl->llt_ifp;
+ int family = lle->lle_tbl->llt_af;
LLE_REQ_LOCK(lle);
lle->r_skip_req = 1;
LLE_REQ_UNLOCK(lle);
+ nhops_request_feedback(ifp, family, lle);
CK_SLIST_FOREACH(child_lle, &lle->lle_children, lle_child_next) {
LLE_REQ_LOCK(child_lle);
child_lle->r_skip_req = 1;
LLE_REQ_UNLOCK(child_lle);
+ nhops_request_feedback(ifp, family, child_lle);
}
}
@@ -462,13 +467,18 @@
static time_t
llentry_get_hittime_raw(struct llentry *lle)
{
- time_t lle_hittime = 0;
+ time_t lle_hittime = 0, nhops_hittime = 0;
LLE_REQ_LOCK(lle);
if ((lle->r_skip_req == 0) && (lle_hittime < lle->lle_hittime))
lle_hittime = lle->lle_hittime;
LLE_REQ_UNLOCK(lle);
+ struct lltable *llt = lle->lle_tbl;
+ nhops_hittime = nhops_get_hittime(llt->llt_ifp, llt->llt_af, lle);
+ if ((nhops_hittime != 0) && (nhops_hittime < lle_hittime))
+ lle_hittime = nhops_hittime;
+
return (lle_hittime);
}
@@ -643,6 +653,12 @@
}
lltable_unlink_entry(llt, lle);
+
+ /* Mark as invalid to invalidate the caches */
+ lle->r_flags &= ~RLLE_VALID;
+ lle->la_flags &= ~LLE_VALID;
+
+ nhops_update_neigh(ifp, llt->llt_af, lle);
IF_AFDATA_WUNLOCK(ifp);
llt->llt_delete_entry(llt, lle);
@@ -875,6 +891,7 @@
lltable_unlink_entry(llt, lle_tmp);
}
lltable_link_entry(llt, lle);
+ nhops_update_neigh(ifp, dst->sa_family, lle);
IF_AFDATA_WUNLOCK(ifp);
if (lle_tmp != NULL) {
Index: sys/net/route/nhop.h
===================================================================
--- sys/net/route/nhop.h
+++ sys/net/route/nhop.h
@@ -134,13 +134,10 @@
};
struct ifnet *nh_ifp; /* Logical egress interface. Always != NULL */
struct ifaddr *nh_ifa; /* interface address to use. Always != NULL */
- struct ifnet *nh_aifp; /* ifnet of the source address. Always != NULL */
+ void *nh_prepend_raw;/* PTR+len for nexthop prepend */
counter_u64_t nh_pksent; /* packets sent using this nhop */
/* 32 bytes + 4xPTR == 64(amd64) / 48(i386) */
- uint8_t nh_prepend_len; /* length of prepend data */
- uint8_t spare[3];
- uint32_t spare1; /* alignment */
- char nh_prepend[48]; /* L2 prepend */
+ struct ifnet *nh_aifp; /* ifnet of the source address. Always != NULL */
struct nhop_priv *nh_priv; /* control plane data */
/* -- 128 bytes -- */
};
@@ -163,6 +160,29 @@
_nh = NULL; \
} while (0)
+/*
+ * L2 prepend infrastructure definitions
+ * Nexthop L2 rewrites may change during nextop lifetime when the neighbor
+ * changes its MAC. For the most common encapsulations - ethernet & IB,
+ * the maximum encap length is 24 (IPoIB) = LLE_MAX_LINKHDR.
+ */
+#define L2_PREPEND_LEN_BITS CACHE_LINE_SHIFT
+#define L2_PREPEND_LEN_MAX ((1 << L2_PREPEND_LEN_BITS) - sizeof(struct epoch_context))
+
+#define _NH_L2_PREPEND_MASK_PTR(_p) ((uintptr_t)(_p) & ~((1 << L2_PREPEND_LEN_BITS) - 1))
+#define NH_L2_PREPEND_GET_PTR(_p) ((void *)_NH_L2_PREPEND_MASK_PTR(_p))
+#define NH_L2_PREPEND_GET_LEN(_p) ((uintptr_t)(_p) & ((1 << L2_PREPEND_LEN_BITS) - 1))
+
+#define NH_L2_COMPILE_PREPEND_PTR(_p, _l) ((void *)((uintptr_t)(_p) | (_l)))
+
+static inline void
+route_set_prepend_nh(struct route *ro, const struct nhop_object *nh)
+{
+ void *ptr = nh->nh_prepend_raw;
+ ro->ro_prepend = (char *)NH_L2_PREPEND_GET_PTR(ptr);
+ ro->ro_plen = NH_L2_PREPEND_GET_LEN(ptr);
+}
+
struct weightened_nhop {
struct nhop_object *nh;
uint32_t weight;
@@ -180,6 +200,15 @@
struct vnet *nhop_get_vnet(const struct nhop_object *nh);
struct nhop_object *nhop_select_func(struct nhop_object *nh, uint32_t flowid);
+void *nhop_alloc_prepend(size_t size);
+void nhop_free_prepend(void *prepend);
+bool nhop_update_prepend(struct nhop_object *nh, void *prepend, size_t len);
+
+void nhops_update_neigh(struct ifnet *ifp, int family, const struct llentry *lle);
+void nhops_request_feedback(struct ifnet *ifp, int family, const struct llentry *lle);
+void nhops_stop_feedback(struct ifnet *ifp, int family, const struct llentry *lle);
+time_t nhops_get_hittime(struct ifnet *ifp, int family, const struct llentry *lle);
+
#endif /* _KERNEL */
/* Kernel <> userland structures */
Index: sys/net/route/nhop.c
===================================================================
--- sys/net/route/nhop.c
+++ sys/net/route/nhop.c
@@ -362,6 +362,12 @@
return (priv_ret);
}
+bool
+is_nhop_linked(struct nhop_priv *nh_priv)
+{
+ return (nh_priv->nh_idx != 0);
+}
+
/*
* Searches for the nexthop by data specifcied in @nh_priv.
* Returns referenced nexthop or NULL.
Index: sys/net/route/nhop_ctl.c
===================================================================
--- sys/net/route/nhop_ctl.c
+++ sys/net/route/nhop_ctl.c
@@ -103,6 +103,27 @@
2 * CACHE_LINE_SIZE)
#define NHOP_PRIV_ALIGNED_SIZE roundup2(sizeof(struct nhop_priv), \
2 * CACHE_LINE_SIZE)
+
+static uma_zone_t nh_prepend_zone; /* Global zone for all nhop prepend data */
+
+struct nhop_prepend {
+ char prepend[L2_PREPEND_LEN_MAX];
+ struct epoch_context epoch_ctx;
+};
+
+#define NHOP_PREPEND_ALIGNED_SIZE roundup2(sizeof(struct nhop_prepend), \
+ CACHE_LINE_SIZE)
+/*
+ * Nexthop L2 rewrites may change during nextop lifetime when the neighbor
+ * changes its MAC. For the most common encapsulations - ethernet & IB,
+ * the maximum encap length is 24 (IPoIB) = LLE_MAX_LINKHDR.
+ *
+ */
+
+static bool nhop_update_prepend_locked(struct nhop_priv *nh_priv, void *prepend,
+ size_t len);
+
+
void
nhops_init(void)
{
@@ -110,6 +131,8 @@
nhops_zone = uma_zcreate("routing nhops",
NHOP_OBJECT_ALIGNED_SIZE + NHOP_PRIV_ALIGNED_SIZE,
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+ nh_prepend_zone = uma_zcreate("nhop prepend", NHOP_PREPEND_ALIGNED_SIZE,
+ NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0);
}
/*
@@ -571,11 +594,16 @@
* and return.
*/
DPRINTF("link_nhop failed!");
+ if (nh_priv->nh_priv_flags & NHF_PRIV_NEIGH)
+ nhop_unlink_neighbor(nh_priv);
destroy_nhop(nh_priv);
return (ENOBUFS);
}
+ if (nhop_need_neigh(nh) && nhop_link_neigh(nh_priv))
+ nh_priv->nh_priv_flags |= NHF_PRIV_NEIGH;
+
return (0);
}
@@ -627,6 +655,12 @@
ifa_free(nh->nh_ifa);
counter_u64_free(nh->nh_pksent);
+ if (nh->nh_prepend_raw != NULL) {
+ struct nhop_prepend *np;
+ np = (struct nhop_prepend *)NH_L2_PREPEND_GET_PTR(nh->nh_prepend_raw);
+ nhop_free_prepend(np);
+ }
+
uma_zfree(nhops_zone, nh);
}
@@ -690,6 +724,10 @@
NET_EPOCH_ENTER(et);
if (refcount_release_if_not_last(&nh_priv->nh_linked)) {
+ /* Stop receiving updates for neighbor prepends */
+ if (nh_priv->nh_priv_flags & NHF_PRIV_NEIGH)
+ nhop_unlink_neighbor(nh_priv);
+
ctl = nh_priv->nh_control;
if (unlink_nhop(ctl, nh_priv) == NULL) {
/* Do not try to reclaim */
@@ -850,8 +888,13 @@
pnhe->nh_mtu = nh->nh_mtu;
pnhe->nh_flags = nh->nh_flags;
- memcpy(pnhe->nh_prepend, nh->nh_prepend, sizeof(nh->nh_prepend));
- pnhe->prepend_len = nh->nh_prepend_len;
+ if (nh->nh_prepend_raw != NULL) {
+ void *ptr = nh->nh_prepend_raw;
+ pnhe->prepend_len = NH_L2_PREPEND_GET_LEN(ptr);
+ memcpy(pnhe->nh_prepend,
+ NH_L2_PREPEND_GET_PTR(ptr), pnhe->prepend_len);
+ } else
+ pnhe->prepend_len = 0;
pnhe->nh_refcount = nh->nh_priv->nh_refcnt;
pnhe->nh_pksent = counter_u64_fetch(nh->nh_pksent);
@@ -923,3 +966,74 @@
return (0);
}
+
+void *
+nhop_alloc_prepend(size_t size)
+{
+ if (size > L2_PREPEND_LEN_MAX)
+ return (NULL);
+ void *prepend = uma_zalloc(nh_prepend_zone, M_NOWAIT | M_ZERO);
+ return (prepend);
+}
+
+void
+nhop_free_prepend(void *prepend)
+{
+ uma_zfree(nh_prepend_zone, prepend);
+}
+
+static void
+destroy_nhop_prepend_epoch(epoch_context_t ctx)
+{
+ struct nhop_prepend *prepend;
+
+ prepend = __containerof(ctx, struct nhop_prepend, epoch_ctx);
+ nhop_free_prepend(prepend);
+}
+
+static bool
+nhop_update_prepend_locked(struct nhop_priv *nh_priv, void *prepend, size_t len)
+{
+ void *ptr = NULL, *old_ptr = NULL;
+ bool result = false;
+
+ if (prepend != NH_L2_PREPEND_GET_PTR(prepend)) {
+ //KASSERT();
+ /* XXX: check alignment */
+
+ prepend = NULL;
+ }
+ if (prepend != NULL)
+ ptr = NH_L2_COMPILE_PREPEND_PTR(prepend, len);
+
+ if (is_nhop_linked(nh_priv)) {
+ old_ptr = nh_priv->nh->nh_prepend_raw;
+ nh_priv->nh->nh_prepend_raw = ptr;
+ result = true;
+ }
+
+ if (old_ptr != NULL) {
+ struct nhop_prepend *np = NH_L2_PREPEND_GET_PTR(old_ptr);
+ epoch_call(net_epoch_preempt, destroy_nhop_prepend_epoch,
+ &np->epoch_ctx);
+ }
+
+ return (result);
+}
+
+bool
+nhop_update_prepend(struct nhop_object *nh, void *prepend, size_t len)
+{
+ struct nhop_priv *nh_priv = nh->nh_priv;
+ struct nh_control *ctl;
+ bool result;
+
+ ctl = nh_priv->nh_control;
+
+ NHOPS_WLOCK(ctl);
+ result = nhop_update_prepend_locked(nh_priv, prepend, len);
+ NHOPS_WUNLOCK(ctl);
+
+ return (result);
+}
+
Index: sys/net/route/nhop_neigh.c
===================================================================
--- /dev/null
+++ sys/net/route/nhop_neigh.c
@@ -0,0 +1,914 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include "opt_inet.h"
+#include "opt_route.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/rmlock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/if_var.h>
+#include <net/if_llatbl.h>
+#include <net/route.h>
+#include <net/route/route_var.h>
+#include <net/route/nhop_utils.h>
+#include <net/route/nhop.h>
+#include <net/route/nhop_var.h>
+#include <net/vnet.h>
+
+#include <netinet/in_var.h>
+
+/*
+ * This file contains data structures management logic for the nexthop ("nhop")
+ * route subsystem.
+ *
+ * Nexthops in the original sense are the objects containing all the necessary
+ * information to forward the packet to the selected destination.
+ * In particular, nexthop is defined by a combination of
+ * ifp, ifa, aifp, mtu, gw addr(if set), nh_type, nh_family, mask of rt_flags and
+ * NHF_DEFAULT
+ *
+ * All nexthops are stored in the resizable hash table.
+ * Additionally, each nexthop gets assigned its unique index (nexthop index)
+ * so userland programs can interact with the nexthops easier. Index allocation
+ * is backed by the bitmask array.
+ */
+
+#define DEBUG_MOD_NAME nhop_neigh
+#define DEBUG_MAX_LEVEL LOG_DEBUG
+#include <net/route/route_debug.h>
+_DECLARE_DEBUG(LOG_DEBUG2);
+
+CHT_SLIST_DEFINE(nhop_neighs, struct nhop_neigh);
+/* produce hash value for an object */
+#define nhop_neighs_hash_obj(_obj) hash_neigh(_obj)
+/* compare two objects */
+#define nhop_neighs_cmp(_one, _two) cmp_neigh(_one, _two)
+/* next object accessor */
+#define nhop_neighs_next(_obj) (_obj)->nn_next
+
+
+struct nn_control {
+ struct nhop_neighs_head nn_head;
+ struct rmlock nn_lock;
+ struct callout nn_feedback_callout;
+ TAILQ_HEAD(,nhop_neigh) nn_feedback_list;
+};
+
+VNET_DEFINE_STATIC(struct nn_control, nn_control);
+#define V_nn_control VNET(nn_control)
+
+#define CTL_WLOCK(ctl) rm_wlock(&ctl->nn_lock)
+#define CTL_WUNLOCK(ctl) rm_wunlock(&ctl->nn_lock)
+#define CTL_TRACKER struct rm_priotracker tracker
+#define CTL_RLOCK(ctl) rm_rlock(&ctl->nn_lock, &tracker)
+#define CTL_RUNLOCK(ctl) rm_runlock(&ctl->nn_lock, &tracker)
+
+struct nhop_neigh {
+ struct ifnet *nn_ifp;
+ uint8_t nn_neigh_family;
+ uint8_t nn_upper_family;
+ uint16_t nn_flags;
+ union {
+ struct in_addr nn_addr4;
+ struct in6_addr nn_addr6;
+ };
+ uint64_t nn_packets;
+ time_t nn_hittime;
+ struct mtx nn_lock;
+ struct nhop_neigh *nn_next;
+ TAILQ_HEAD(, nhop_priv) nn_nhops;
+ TAILQ_ENTRY(nhop_neigh) nn_feedback_entry;
+};
+#define NEIGH_END_CMP (__offsetof(struct nhop_neigh, nn_packets))
+
+#define NN_FLAG_FB_LINKED 0x01 /* Linked to the feedback list */
+
+#define NN_LOCK_INIT(nn) mtx_init(&(nn)->nn_lock, "nhop_neigh lock", NULL, MTX_DEF)
+#define NN_LOCK_DESTROY(nn) mtx_destroy(&(nn)->nn_lock)
+#define NN_LOCK(nn) mtx_lock(&(nn)->nn_lock)
+#define NN_UNLOCK(nn) mtx_unlock(&(nn)->nn_lock)
+
+_Static_assert(L2_PREPEND_LEN_MAX >= LLE_MAX_LINKHDR,
+ "CACHE_LINE_SIZE has to be at least LLE_MAX_LINKHDR");
+
+static void free_neigh(struct nhop_neigh *nn);
+static void update_prepend_ptr(struct nhop_object *nh, const struct llentry *lle);
+static void schedule_callout(struct nn_control *ctl);
+
+char *nhop_print_buf(const struct nhop_object *nh, char *buf, size_t bufsize);
+char *lle_print_buf(const struct llentry *lle, struct ifnet *ifp, int family, char *buf, size_t bufsize);
+char *lle_print_buf_lltable(const struct llentry *lle, char *buf, size_t bufsize);
+char *neigh_print_buf(const struct nhop_neigh *nn, char *buf, size_t bufsize);
+const char *rib_print_family(int family);
+
+#if 0
+static char
+af_to_char(int family)
+{
+ switch (family) {
+ case AF_INET:
+ return '4';
+ case AF_INET6:
+ return '6';
+ case AF_LINK:
+ return '*';
+ }
+ return 'X';
+}
+#endif
+
+static int
+lle_get_upper_family(const struct llentry *lle, int family)
+{
+ return (lle->r_family == 0 ? family : lle->r_family);
+}
+
+__noinline char *
+neigh_print_buf(const struct nhop_neigh *nn, char *buf, size_t bufsize)
+{
+ /* nn/inet/em0/192.168.0.1 */
+ char abuf[INET6_ADDRSTRLEN];
+
+ if (nn == NULL) {
+ snprintf(buf, bufsize, "nn/NULL");
+ return (buf);
+ }
+
+ switch (nn->nn_neigh_family) {
+ case AF_INET6:
+ inet_ntop(AF_INET6, &nn->nn_addr6, abuf, sizeof(abuf));
+ snprintf(buf, bufsize, "nn/%s/%s/%s",
+ rib_print_family(nn->nn_upper_family), if_name(nn->nn_ifp), abuf);
+ break;
+ case AF_INET:
+ inet_ntop(AF_INET, &nn->nn_addr4, abuf, sizeof(abuf));
+ snprintf(buf, bufsize, "nn/%s/%s/%s",
+ rib_print_family(nn->nn_upper_family), if_name(nn->nn_ifp), abuf);
+ break;
+ default:
+ snprintf(buf, bufsize, "nn/%s/%s/unknown(%s)",
+ rib_print_family(nn->nn_upper_family), if_name(nn->nn_ifp),
+ rib_print_family(nn->nn_neigh_family));
+ }
+
+ return (buf);
+}
+
+__noinline char *
+nhop_print_buf(const struct nhop_object *nh, char *buf, size_t bufsize)
+{
+ /* nh#33/inet/em0/192.168.0.1 */
+ char abuf[INET6_ADDRSTRLEN];
+ struct nhop_priv *nh_priv = nh->nh_priv;
+
+ const char *upper_str = rib_print_family(nh->nh_priv->nh_family);
+
+ switch (nh->gw_sa.sa_family) {
+ case AF_INET6:
+ inet_ntop(AF_INET6, &nh->gw6_sa.sin6_addr, abuf, sizeof(abuf));
+ snprintf(buf, bufsize, "nh#%d/%s/%s/%s", nh_priv->nh_idx, upper_str,
+ if_name(nh->nh_ifp), abuf);
+ break;
+ case AF_INET:
+ inet_ntop(AF_INET, &nh->gw4_sa.sin_addr, abuf, sizeof(abuf));
+ snprintf(buf, bufsize, "nh#%d/%s/%s/%s", nh_priv->nh_idx, upper_str,
+ if_name(nh->nh_ifp), abuf);
+ break;
+ case AF_LINK:
+ snprintf(buf, bufsize, "nh#%d/%s/%s/resolve", nh_priv->nh_idx, upper_str,
+ if_name(nh->nh_ifp));
+ break;
+ default:
+ snprintf(buf, bufsize, "nh#%d/%s/%s/????", nh_priv->nh_idx, upper_str,
+ if_name(nh->nh_ifp));
+ break;
+ }
+
+ return (buf);
+}
+
+__noinline char *
+lle_print_buf(const struct llentry *lle, struct ifnet *ifp, int family, char *buf, size_t bufsize)
+{
+ /* lle/4/V/em0/1.2.3.4 */
+ char abuf[INET6_ADDRSTRLEN];
+
+ const char *valid = (lle->r_flags & RLLE_VALID) ? "valid" : "no_l2";
+ const char *upper_str = rib_print_family(lle_get_upper_family(lle, family));
+
+ switch (family) {
+ case AF_INET:
+ inet_ntop(AF_INET, &lle->r_l3addr.addr4, abuf, sizeof(abuf));
+ snprintf(buf, bufsize, "lle/%s/%s/%s/%s", upper_str,
+ valid, if_name(ifp), abuf);
+ break;
+ case AF_INET6:
+ inet_ntop(AF_INET6, &lle->r_l3addr.addr6, abuf, sizeof(abuf));
+ snprintf(buf, bufsize, "lle/%s/%s/%s/%s", upper_str,
+ valid, if_name(ifp), abuf);
+ break;
+ default:
+ snprintf(buf, bufsize, "lle/%s/%s/%s/????", upper_str,
+ valid, if_name(ifp));
+ break;
+ }
+
+ return (buf);
+}
+
+__noinline char *
+lle_print_buf_lltable(const struct llentry *lle, char *buf, size_t bufsize)
+{
+ struct lltable *tbl = lle->lle_tbl;
+
+ return (lle_print_buf(lle, lltable_get_ifp(tbl), lltable_get_af(tbl), buf, bufsize));
+}
+
+const char *
+rib_print_family(int family)
+{
+
+ if (family == AF_INET)
+ return ("inet");
+ else if (family == AF_INET6)
+ return ("inet6");
+ else
+ return ("unknown");
+}
+
+
+void
+vnet_nhops_init_neigh(void)
+{
+ struct nn_control *ctl = &V_nn_control;
+ /*
+ * Allocate nexthop hash. Start with 16 items by default (128 bytes).
+ * This will be enough for most of the cases.
+ */
+ int num_buckets = 16;
+ size_t alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets);
+
+ void *ptr = malloc(alloc_size, M_NHOP, M_WAITOK | M_ZERO);
+ CHT_SLIST_INIT(&ctl->nn_head, ptr, num_buckets);
+ rm_init(&ctl->nn_lock, "nexthop neigh lock");
+ TAILQ_INIT(&ctl->nn_feedback_list);
+
+ callout_init(&ctl->nn_feedback_callout, 1);
+}
+
+void
+vnet_nhop_destroy_neigh(void)
+{
+ struct nn_control *ctl = &V_nn_control;
+ struct nhop_neigh *nn, *nn_tmp;
+
+ callout_drain(&ctl->nn_feedback_callout);
+
+ /*
+ *
+ * Close to the end.
+ * All relevant interfaces are set to if_down (~IFF_UP) so
+ * there shouldn't be any incoming traffic that can trigger
+ * ARP/ND updates.
+ * XXX: static records triggered by ndp?
+ * Should not be any routes - as it's called in the end of
+ * rtables_destroy()
+ */
+
+ CHT_SLIST_FOREACH_SAFE(&ctl->nn_head, nhop_neighs, nn, nn_tmp) {
+ free_neigh(nn);
+ } CHT_SLIST_FOREACH_END;
+
+ rm_destroy(&ctl->nn_lock);
+}
+
+/*
+ * Nexhop hash calculation:
+ */
+struct _hash_data {
+ uint16_t ifentropy;
+ uint8_t neigh_family;
+ uint8_t upper_family;
+ uint32_t addr;
+};
+
+static unsigned
+djb_hash(const unsigned char *h, const int len)
+{
+ unsigned int result = 0;
+ int i;
+
+ for (i = 0; i < len; i++)
+ result = 33 * result ^ h[i];
+
+ return (result);
+}
+
+static uint32_t
+hash_neigh(const struct nhop_neigh *nn)
+{
+ struct _hash_data key = {
+ .ifentropy = (uint16_t)((((uintptr_t)nn->nn_ifp) >> 6) & 0xFFFF),
+ .neigh_family = nn->nn_neigh_family,
+ .upper_family = nn->nn_upper_family,
+ .addr = (nn->nn_neigh_family == AF_INET6) ?
+ nn->nn_addr6.s6_addr32[3] : nn->nn_addr4.s_addr
+ };
+
+ return (uint32_t)(djb_hash((const unsigned char *)&key, sizeof(key)));
+}
+
+static int
+cmp_neigh(const struct nhop_neigh *_one, const struct nhop_neigh *_two)
+{
+
+ if (memcmp(_one, _two, NEIGH_END_CMP) != 0)
+ return (0);
+ return (1);
+}
+
+/*
+ * Searches for the nexthop neigh by data specified in @nh_priv.
+ * Returns referenced nexthop or NULL.
+ */
+static struct nhop_neigh *
+find_neigh(struct nn_control *ctl, const struct nhop_neigh *nn)
+{
+ struct nhop_neigh *nn_ret;
+
+ CHT_SLIST_FIND_BYOBJ(&ctl->nn_head, nhop_neighs, nn, nn_ret);
+ return (nn_ret);
+}
+
+static bool
+has_neigh(struct nn_control *ctl, const struct nhop_neigh *nn_base)
+{
+ CTL_TRACKER;
+ bool result;
+
+ CTL_RLOCK(ctl);
+ result = find_neigh(ctl, nn_base) != NULL;
+ CTL_RUNLOCK(ctl);
+
+ return (result);
+}
+
+/*
+ * Tries to resize neighbor hash to the value specified by @new_num_buckets.
+ */
+static void
+resize_neigh_hash(struct nn_control *ctl, uint32_t new_num_buckets)
+{
+ size_t alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_num_buckets);
+ void *nn_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
+ if (nn_ptr == NULL) {
+ /* allocations has failed. */
+ RT_LOG(LOG_NOTICE, "neigh hash resize to %u has failed", new_num_buckets);
+ return;
+ }
+
+ CTL_WLOCK(ctl);
+ RT_LOG(LOG_DEBUG, "going to resize neigh hash: %u -> %u",
+ ctl->nn_head.hash_size, new_num_buckets);
+ CHT_SLIST_RESIZE(&ctl->nn_head, nhop_neighs, nn_ptr, new_num_buckets);
+ CTL_WUNLOCK(ctl);
+
+ if (nn_ptr != NULL)
+ free(nn_ptr, M_NHOP);
+}
+
+/*
+ * Checks if nexthop @nh can be attached to the LLE/NDP neighbor.
+ * Function verifies that target interface has L2 and nexthop contains
+ * gateway (or is a host route).
+ * Returns true on success.
+ */
+bool
+nhop_need_neigh(const struct nhop_object *nh)
+{
+ bool match = false;
+
+ switch (nh->nh_ifp->if_type) {
+ case IFT_BRIDGE:
+ case IFT_ETHER:
+ case IFT_INFINIBAND:
+ case IFT_L2VLAN:
+ match = true;
+ break;
+ }
+
+ if (match) {
+ if (nh->nh_flags & (NHF_GATEWAY|NHF_HOST))
+ return (true);
+ }
+
+ return (false);
+}
+
+/*
+ * Fills in nhop_neigh data based on the nexthop specified by @nh_priv.
+ */
+static void
+init_neigh(struct nhop_neigh *nn, const struct nhop_priv *nh_priv)
+{
+ const struct nhop_object *nh = nh_priv->nh;
+
+ nn->nn_ifp = nh->nh_ifp;
+ nn->nn_neigh_family = nh->gw_sa.sa_family;
+ nn->nn_upper_family = nh_priv->nh_family;
+ switch (nn->nn_neigh_family) {
+ case AF_INET:
+ nn->nn_addr4 = nh->gw4_sa.sin_addr;
+ break;
+ case AF_INET6:
+ nn->nn_addr6 = nh->gw6_sa.sin6_addr;
+ break;
+ }
+ TAILQ_INIT(&nn->nn_nhops);
+ NN_LOCK_INIT(nn);
+}
+
+static void
+free_neigh(struct nhop_neigh *nn)
+{
+ NN_LOCK_DESTROY(nn);
+ free(nn, M_NHOP);
+}
+
+static struct llentry *
+find_lle(struct nhop_priv *nh_priv)
+{
+ void *afdata_ptr;
+ struct llentry *lle = NULL;
+ struct lltable *llt = NULL;
+ struct nhop_object *nh = nh_priv->nh;
+
+ switch (nh->gw_sa.sa_family) {
+ case AF_INET:
+ afdata_ptr = nh->nh_ifp->if_afdata[AF_INET];
+ if (afdata_ptr != NULL)
+ llt = ((struct in_ifinfo *)afdata_ptr)->ii_llt;
+ break;
+ case AF_INET6:
+ afdata_ptr = nh->nh_ifp->if_afdata[AF_INET6];
+ if (afdata_ptr != NULL)
+ llt = ((struct in6_ifextra *)afdata_ptr)->lltable;
+ break;
+ }
+
+ if (llt != NULL)
+ lle = lla_lookup(llt, LLE_UNLOCKED, &nh->gw_sa);
+ if (lle != NULL) {
+ if (nh_priv->nh_family != nh->gw_sa.sa_family)
+ lle = llentry_lookup_family(lle, nh_priv->nh_family);
+ }
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG
+ char nhbuf[48], lbuf[48];
+ FIB_NH_LOG(LOG_DEBUG, nh, "nhop %s: mapped to lle %s",
+ nhop_print_buf(nh, nhbuf, sizeof(nhbuf)),
+ lle ? lle_print_buf(lle, nh->nh_ifp, nh->gw_sa.sa_family, lbuf, sizeof(lbuf)) : "NULL");
+#endif
+ return (lle);
+}
+
+/*
+ * Links nextop @nh_priv to the nexhop neighbor hash table and tries
+ * to fill in L2 nexthop prepend.
+ * Returns true on successful linkage.
+ */
+bool
+nhop_link_neigh(struct nhop_priv *nh_priv)
+{
+ uint32_t num_buckets_new;
+ struct nn_control *ctl = &V_nn_control;
+ struct nhop_neigh *nn = NULL, *nn_new;
+
+ NET_EPOCH_ASSERT();
+
+ /*
+ * Most llentries have at most one nexthop attached.
+ * Thus, assume we'll be inserting a new record.
+ */
+
+ nn_new = malloc(sizeof(struct nhop_neigh), M_NHOP, M_NOWAIT | M_ZERO);
+ if (nn_new == NULL)
+ return (false);
+ init_neigh(nn_new, nh_priv);
+
+ /* Try to calculate the prepend */
+ struct llentry *lle = find_lle(nh_priv);
+ if (lle != NULL)
+ update_prepend_ptr(nh_priv->nh, lle);
+
+ CTL_WLOCK(ctl);
+
+ /*
+ * Check if we need to resize hash and index.
+ * The following 2 functions returns either new size or 0
+ * if resize is not required.
+ */
+ num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->nn_head);
+
+ /* Check if already exists */
+ CHT_SLIST_FIND_BYOBJ(&ctl->nn_head, nhop_neighs, nn_new, nn);
+
+ if (nn == NULL) {
+ nn = nn_new;
+ nn_new = NULL;
+ CHT_SLIST_INSERT_HEAD(&ctl->nn_head, nhop_neighs, nn);
+
+ /*
+ * XXXME: There can be a race when lle gets deleted after lookup
+ */
+ }
+ TAILQ_INSERT_TAIL(&nn->nn_nhops, nh_priv, nh_neigh_entry);
+
+ CTL_WUNLOCK(ctl);
+
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG
+ char nhbuf[48], nnbuf[48];
+ FIB_NH_LOG(LOG_DEBUG, nh_priv->nh, "nhop %s linked to %s %s",
+ nhop_print_buf(nh_priv->nh, nhbuf, sizeof(nhbuf)),
+ nn_new == NULL ? "new" : "existing",
+ neigh_print_buf(nn, nnbuf, sizeof(nnbuf)));
+#endif
+
+ if (nn_new != NULL)
+ free_neigh(nn_new);
+
+ if (num_buckets_new > 0)
+ resize_neigh_hash(ctl, num_buckets_new);
+
+ return (true);
+}
+
+/*
+ * Unlinks nexthop specified by @nh_priv data.
+ */
+void
+nhop_unlink_neighbor(struct nhop_priv *nh_priv)
+{
+ struct nn_control *ctl = &V_nn_control;
+ uint32_t num_buckets_new;
+ struct nhop_neigh *nn, *nn_del = NULL, nn_base = {};
+
+ init_neigh(&nn_base, nh_priv);
+
+ CTL_WLOCK(ctl);
+
+ nn = find_neigh(ctl, &nn_base);
+ if (nn != NULL) {
+ TAILQ_REMOVE(&nn->nn_nhops, nh_priv, nh_neigh_entry);
+ if (TAILQ_EMPTY(&nn->nn_nhops)) {
+ CHT_SLIST_REMOVE(&ctl->nn_head, nhop_neighs, nn, nn_del);
+ }
+ }
+
+ /* Check if hash or index needs to be resized */
+ num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->nn_head);
+
+ CTL_WUNLOCK(ctl);
+
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG
+ char nhbuf[48], nnbuf[48];
+ FIB_NH_LOG(LOG_DEBUG, nh_priv->nh, "nhop %s unlinked from the neigh %s%s",
+ nhop_print_buf(nh_priv->nh, nhbuf, sizeof(nhbuf)),
+ neigh_print_buf(nn, nnbuf, sizeof(nnbuf)),
+ nn_del == NULL ? "" : " (last entry)");
+#endif
+
+ if (nn_del != NULL)
+ free_neigh(nn_del);
+
+ if (num_buckets_new > 0)
+ resize_neigh_hash(ctl, num_buckets_new);
+}
+
+/*
+ * Updates nhop @nh L2 prepend data with the pre-calculated prepend
+ * in @lle. If @lle contains no valid data, removes an existing L2 prepend.
+ */
+static void
+update_prepend_ptr(struct nhop_object *nh, const struct llentry *lle)
+{
+ void *prepend = NULL;
+ int prepend_len = 0;
+
+ if (lle->r_flags & RLLE_VALID) {
+ prepend_len = lle->r_hdrlen;
+ prepend = nhop_alloc_prepend(prepend_len);
+ if (prepend != NULL)
+ memcpy(prepend, lle->r_linkdata, prepend_len);
+ }
+
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG
+ char nhbuf[48], lbuf[48];
+ FIB_NH_LOG(LOG_DEBUG, nh, "nhop %s sync L2 from %s",
+ nhop_print_buf(nh, nhbuf, sizeof(nhbuf)),
+ lle_print_buf(lle, nh->nh_ifp, nh->gw_sa.sa_family, lbuf, sizeof(lbuf)));
+#endif
+
+ nhop_update_prepend(nh, prepend, prepend_len);
+}
+
+/*
+ * Hook called by the LLE subsystem notifying of the changed L2 prepend
+ * for the @lle entry.
+ * Function searches the matching neigh entry and updates NH L2 prepend
+ * for all of the registered nexthops.
+ */
+void
+nhops_update_neigh(struct ifnet *ifp, int family, const struct llentry *lle)
+{
+ CTL_TRACKER;
+ struct nn_control *ctl = &V_nn_control;
+
+ NET_EPOCH_ASSERT();
+
+ if (VNET_IS_DYING(curvnet))
+ return;
+
+ struct nhop_neigh nn_base = {
+ .nn_ifp = ifp,
+ .nn_upper_family = lle_get_upper_family(lle, family),
+ .nn_neigh_family = family,
+ .nn_addr6 = lle->r_l3addr.addr6,
+ };
+
+ bool matched = has_neigh(ctl, &nn_base);
+
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG2
+ char lbuf[48];
+ RT_LOG(LOG_DEBUG2, "L2 prepend update from %s (matched: %s)",
+ lle_print_buf(lle, ifp, family, lbuf, sizeof(lbuf)),
+ matched ? "true" : "false");
+#endif
+
+ if (!matched)
+ return;
+
+ CTL_RLOCK(ctl);
+ struct nhop_neigh *nn = find_neigh(ctl, &nn_base);
+ if (nn != NULL) {
+ struct nhop_priv *nh_priv;
+
+ TAILQ_FOREACH(nh_priv, &nn->nn_nhops, nh_neigh_entry)
+ update_prepend_ptr(nh_priv->nh, lle);
+ }
+ CTL_RUNLOCK(ctl);
+}
+
+
+/*
+ * LLE validity.
+ * Both ARP and ND state machines requires datapath-liveness checking
+ * as a step of expiring an lle entry. Additionally, ND state machine
+ * requires exact timestamp of the first packet traversing LLE after the
+ * liveness checking request, so it can execute check callouts less often
+ * (STALE -> DELAY -> PROBE).
+ *
+ * Thus, upon receiving the request to check dataplane liveness from LLE layers,
+ * the code below adds matching neigh entry to the feedback list and fires
+ * per-VNET callout on per-second basis, recording the first time when the
+ * packet is traversed.
+ *
+ * Neighs are removed from the list in 2 ways: the first is done by the callout
+ * upon recording the timestamp, the second is LLE code removing the matching
+ * LLE.
+ *
+ */
+
+/*
+ * Returns total count of all packets that traversed the nexthops
+ * registered in the @nn.
+ */
+static uint64_t
+calc_pktsent(struct nhop_neigh *nn)
+{
+ uint64_t nn_packets = 0;
+ struct nhop_priv *nh_priv;
+
+ TAILQ_FOREACH(nh_priv, &nn->nn_nhops, nh_neigh_entry)
+ nn_packets += counter_u64_fetch(nh_priv->nh->nh_pksent);
+ return (nn_packets);
+}
+
+/*
+ * Callout that is called every second to check if the cumulative amount
+ * of packets traversing relevant neigh entries has changed. If the change
+ * is observed, record the change time and removes entry from the list.
+ *
+ * Note: removing nexthops from the neigh entry results in false positive.
+ * However, as the value is used to check if the underlying lle is still used,
+ * the worst that can happen, is that the entry will be kept slightly longer
+ * before the deletion.
+ */
+static void
+pktsent_callout(void *_arg)
+{
+ struct nn_control *ctl = (struct nn_control *)_arg;
+ struct nhop_neigh *nn, *nn_tmp;
+ bool empty;
+
+ CTL_WLOCK(ctl);
+
+ TAILQ_FOREACH_SAFE(nn, &ctl->nn_feedback_list, nn_feedback_entry, nn_tmp) {
+ if (nn->nn_packets != calc_pktsent(nn)) {
+ nn->nn_packets = 0;
+ nn->nn_hittime = time_uptime;
+ nn->nn_flags &= ~NN_FLAG_FB_LINKED;
+ TAILQ_REMOVE(&ctl->nn_feedback_list, nn, nn_feedback_entry);
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG2
+ char nnbuf[48];
+ RT_LOG(LOG_DEBUG2, "L2 neigh %s got datapath feedback at %lu",
+ neigh_print_buf(nn, nnbuf, sizeof(nnbuf)),
+ nn->nn_hittime);
+#endif
+ }
+ }
+ empty = TAILQ_EMPTY(&ctl->nn_feedback_list);
+ CTL_WUNLOCK(ctl);
+ if (!empty)
+ schedule_callout(ctl);
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG2
+ else
+ RT_LOG(LOG_DEBUG2, "datapath callout stopped");
+#endif
+}
+
+static void
+schedule_callout(struct nn_control *ctl)
+{
+ if (callout_pending(&ctl->nn_feedback_callout))
+ return;
+ callout_reset_sbt(&ctl->nn_feedback_callout, SBT_1S * 1, 0,
+ pktsent_callout, ctl, 0);
+}
+
+static void
+update_feedback_membership(struct ifnet *ifp, int family, const struct llentry *lle,
+ bool add)
+{
+ struct nn_control *ctl = &V_nn_control;
+ struct nhop_neigh *nn;
+ bool need_callout = false;
+
+ NET_EPOCH_ASSERT();
+
+ if (VNET_IS_DYING(curvnet))
+ return;
+
+ struct nhop_neigh nn_base = {
+ .nn_ifp = ifp,
+ .nn_upper_family = lle_get_upper_family(lle, family),
+ .nn_neigh_family = family,
+ .nn_addr6 = lle->r_l3addr.addr6,
+ };
+
+ /* Most of LLEs do not have mapped nhops, so fail early */
+ bool matched = has_neigh(ctl, &nn_base);
+
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG2
+ char lbuf[48];
+ lle_print_buf(lle, ifp, family, lbuf, sizeof(lbuf));
+ if (matched) {
+ RT_LOG(LOG_DEBUG2, "%s datapath feedback for %s", add ? "request" : "abort", lbuf);
+ } else {
+ RT_LOG(LOG_DEBUG3, "%s datapath feedback for %s (nomatch)", add ? "request" : "abort", lbuf);
+ }
+#endif
+
+ if (!matched)
+ return;
+
+ CTL_WLOCK(ctl);
+ nn = find_neigh(ctl, &nn_base);
+ if (nn != NULL) {
+ if (add) {
+ nn->nn_packets = calc_pktsent(nn);
+ nn->nn_hittime = 0;
+
+ if (!(nn->nn_flags & NN_FLAG_FB_LINKED)) {
+ nn->nn_flags |= NN_FLAG_FB_LINKED;
+ need_callout = TAILQ_EMPTY(&ctl->nn_feedback_list);
+ TAILQ_INSERT_TAIL(&ctl->nn_feedback_list, nn, nn_feedback_entry);
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG2
+ char llbuf[48], nnbuf[48];
+ RT_LOG(LOG_DEBUG2, "added %s to datapath feedback for %s",
+ neigh_print_buf(nn, nnbuf, sizeof(nnbuf)),
+ lle_print_buf(lle, ifp, family, llbuf, sizeof(llbuf)));
+#endif
+ }
+ } else {
+ /* Remove from the list */
+ if (nn->nn_flags & NN_FLAG_FB_LINKED) {
+ nn->nn_flags &= ~NN_FLAG_FB_LINKED;
+ TAILQ_REMOVE(&ctl->nn_feedback_list, nn, nn_feedback_entry);
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG2
+ char llbuf[48],
+ nnbuf[48];
+ RT_LOG(LOG_DEBUG2, "removed %s from datapath feedback for %s",
+ neigh_print_buf(nn, nnbuf, sizeof(nnbuf)),
+ lle_print_buf(lle, ifp, family, llbuf, sizeof(llbuf)));
+#endif
+ }
+ }
+ }
+ CTL_WUNLOCK(ctl);
+ if (need_callout) {
+ schedule_callout(ctl);
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG2
+ RT_LOG(LOG_DEBUG2, "datapath callout started");
+#endif
+ }
+}
+
+void
+nhops_request_feedback(struct ifnet *ifp, int family, const struct llentry *lle)
+{
+ update_feedback_membership(ifp, family, lle, true);
+}
+
+void
+nhops_stop_feedback(struct ifnet *ifp, int family, const struct llentry *lle)
+{
+ update_feedback_membership(ifp, family, lle, false);
+}
+
+/*
+ * Returns the timestamp of the first packet traversing the nexhops matching @lle
+ * after nhops_request_feedback() call.
+ */
+time_t
+nhops_get_hittime(struct ifnet *ifp, int family, const struct llentry *lle)
+{
+ struct nn_control *ctl = &V_nn_control;
+ struct nhop_neigh *nn;
+ time_t hittime = 0;
+ CTL_TRACKER;
+
+ NET_EPOCH_ASSERT();
+
+ if (VNET_IS_DYING(curvnet))
+ return (0);
+
+ struct nhop_neigh nn_base = {
+ .nn_ifp = ifp,
+ .nn_upper_family = lle_get_upper_family(lle, family),
+ .nn_neigh_family = family,
+ .nn_addr6 = lle->r_l3addr.addr6,
+ };
+
+ CTL_RLOCK(ctl);
+ nn = find_neigh(ctl, &nn_base);
+ if (nn != NULL)
+ hittime = nn->nn_hittime;
+ CTL_RUNLOCK(ctl);
+
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG2
+ if (nn != NULL) {
+ char lbuf[48], nnbuf[48];
+ RT_LOG(LOG_DEBUG2, "%s datapath feedback returned %lu from %s",
+ lle_print_buf(lle, ifp, family, lbuf, sizeof(lbuf)),
+ hittime, neigh_print_buf(nn, nnbuf, sizeof(nnbuf)));
+ }
+#endif
+
+ return (hittime);
+}
Index: sys/net/route/nhop_utils.h
===================================================================
--- sys/net/route/nhop_utils.h
+++ sys/net/route/nhop_utils.h
@@ -139,6 +139,11 @@
for (_x = CHT_FIRST(_head, _i); _x; _x = _PX##_next(_x))
#define CHT_SLIST_FOREACH_END }
+#define CHT_SLIST_FOREACH_SAFE(_head, _PX, _x, _t) \
+ for (uint32_t _i = 0; _i < (_head)->hash_size; _i++) { \
+ for (_x = CHT_FIRST(_head, _i); (_x) && (_t = _PX##_next(_x)); _x = _t)
+#define CHT_SLIST_FOREACH_SAFE_END }
+
#define CHT_SLIST_RESIZE(_head, _PX, _new_void_ptr, _new_hsize) \
uint32_t _new_idx; \
typeof((_head)->ptr) _new_ptr = (void *)_new_void_ptr; \
Index: sys/net/route/nhop_var.h
===================================================================
--- sys/net/route/nhop_var.h
+++ sys/net/route/nhop_var.h
@@ -80,12 +80,14 @@
uint32_t rt_flags; /* routing flags for the control plane */
/* nhop lookup comparison end */
uint32_t nh_idx; /* nexthop index */
+ uint32_t nh_priv_flags; /* non user-visible flags */
void *cb_func; /* function handling additional rewrite caps */
u_int nh_refcnt; /* number of references, refcount(9) */
u_int nh_linked; /* refcount(9), == 2 if linked to the list */
struct nhop_object *nh; /* backreference to the dataplane nhop */
struct nh_control *nh_control; /* backreference to the rnh */
struct nhop_priv *nh_next; /* hash table membership */
+ TAILQ_ENTRY(nhop_priv) nh_neigh_entry; /* neigh membership */
struct vnet *nh_vnet; /* vnet nhop belongs to */
struct epoch_context nh_epoch_ctx; /* epoch data for nhop */
};
@@ -95,13 +97,22 @@
#define NH_IS_PINNED(_nh) ((!NH_IS_NHGRP(_nh)) && \
((_nh)->nh_priv->rt_flags & RTF_PINNED))
+#define NHF_PRIV_NEIGH 0x01 /* linked to a neighbor record */
+
/* nhop.c */
struct nhop_priv *find_nhop(struct nh_control *ctl,
const struct nhop_priv *nh_priv);
int link_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv);
struct nhop_priv *unlink_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv);
+bool is_nhop_linked(struct nhop_priv *nh_priv);
/* nhop_ctl.c */
int cmp_priv(const struct nhop_priv *_one, const struct nhop_priv *_two);
+/* nhop_neigh.c */
+
+bool nhop_need_neigh(const struct nhop_object *nh);
+bool nhop_link_neigh(struct nhop_priv *nh_priv);
+void nhop_unlink_neighbor(struct nhop_priv *nh_priv);
+
#endif
Index: sys/net/route/route_ctl.c
===================================================================
--- sys/net/route/route_ctl.c
+++ sys/net/route/route_ctl.c
@@ -118,6 +118,9 @@
VNET_DEFINE_STATIC(uma_zone_t, rtzone);
#define V_rtzone VNET(rtzone)
+/* Debug bits */
+SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+
void
vnet_rtzone_init()
{
Index: sys/net/route/route_debug.h
===================================================================
--- /dev/null
+++ sys/net/route/route_debug.h
@@ -0,0 +1,126 @@
+/*-
+ * Copyright (c) 2021
+ * Alexander V. Chernikov <melifaro@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_ROUTE_DEBUG_H_
+#define _NET_ROUTE_DEBUG_H_
+
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+
+const char *rib_print_family(int family);
+
+static inline uint32_t
+nhop_get_fibnum(const struct nhop_object *nh)
+{
+ return (0);
+}
+
+/* DEBUG logic */
+#if defined(DEBUG_MOD_NAME) && defined(DEBUG_MAX_LEVEL)
+#define _DEBUG_PASS_MSG(_l) (DEBUG_MOD_NAME##_debug_level >= (_l))
+
+#define OID_NAME _OID_NAME(DEBUG_MOD_NAME)
+#define _OID_NAME(a) _OID_NAME_INDIRECT(a)
+#define _OID_NAME_INDIRECT(prefix) prefix##_debug_level
+
+#define SPREFIX _SPREFIX(DEBUG_MOD_NAME)
+#define _SPREFIX(a) __SPREFIX(a)
+#define __SPREFIX(a) #a
+
+
+#define _DECLARE_DEBUG(_default_level) \
+ SYSCTL_DECL(_net_route_debug); \
+ static int DEBUG_MOD_NAME##_debug_level = _default_level; \
+ SYSCTL_INT(_net_route_debug, OID_AUTO, OID_NAME,\
+ CTLFLAG_RW | CTLFLAG_RWTUN, \
+ &(DEBUG_MOD_NAME##_debug_level), 0, "debuglevel")
+
+#ifndef LOG_DEBUG2
+#define LOG_DEBUG2 8
+#endif
+#ifndef LOG_DEBUG3
+#define LOG_DEBUG3 9
+#endif
+
+#define _output printf
+
+#define _FIB_LOG(_l, _fib, _fam, _fmt, ...) if (_DEBUG_PASS_MSG(_l)) { \
+ _output("[" SPREFIX "] %s.%u %s: " _fmt "\n", rib_print_family(_fam), _fib, __func__, ##__VA_ARGS__); \
+}
+#define FIB_LOG(_l, _fib, _fam, _fmt, ...) FIB_LOG_##_l(_l, _fib, _fam, _fmt, ## __VA_ARGS__)
+
+#define FIB_NH_LOG(_l, _nh, _fmt, ...) FIB_LOG_##_l(_l, nhop_get_fibnum(_nh), (_nh)->gw_sa.sa_family, _fmt, ## __VA_ARGS__)
+
+#define _RT_LOG(_l, _fmt, ...) if (_DEBUG_PASS_MSG(_l)) { \
+ _output("[" SPREFIX "] %s: " _fmt "\n", __func__, ##__VA_ARGS__); \
+}
+#define RT_LOG(_l, _fmt, ...) RT_LOG_##_l(_l, _fmt, ## __VA_ARGS__)
+
+
+#if DEBUG_MAX_LEVEL>=LOG_DEBUG2
+#define FIB_LOG_LOG_DEBUG3 _FIB_LOG
+#define RT_LOG_LOG_DEBUG3 _RT_LOG
+#else
+#define FIB_LOG_LOG_DEBUG3(_l, _fib, _fam, _fmt, ...)
+#define RT_LOG_LOG_DEBUG3(_l, _fmt, ...)
+#endif
+#if DEBUG_MAX_LEVEL>=LOG_DEBUG2
+#define FIB_LOG_LOG_DEBUG2 _FIB_LOG
+#define RT_LOG_LOG_DEBUG2 _RT_LOG
+#else
+#define FIB_LOG_LOG_DEBUG2(_l, _fib, _fam, _fmt, ...)
+#define RT_LOG_LOG_DEBUG2(_l, _fmt, ...)
+#endif
+#if DEBUG_MAX_LEVEL>=LOG_DEBUG
+#define FIB_LOG_LOG_DEBUG _FIB_LOG
+#define RT_LOG_LOG_DEBUG _RT_LOG
+#else
+#define FIB_LOG_LOG_DEBUG(_l, _fib, _fam, _fmt, ...)
+#define RT_LOG_LOG_DEBUG(_l, _fmt, ...)
+#endif
+#if DEBUG_MAX_LEVEL>=LOG_INFO
+#define FIB_LOG_LOG_INFO _FIB_LOG
+#define RT_LOG_LOG_INFO _RT_LOG
+#else
+#define FIB_LOG_LOG_INFO(_l, _fib, _fam, _fmt, ...)
+#define RT_LOG_LOG_INFO(_l, _fmt, ...)
+#endif
+#define FIB_LOG_LOG_NOTICE _FIB_LOG
+#define FIB_LOG_LOG_ERR _FIB_LOG
+#define FIB_LOG_LOG_WARNING _FIB_LOG
+#define RT_LOG_LOG_NOTICE _RT_LOG
+#define RT_LOG_LOG_ERR _RT_LOG
+#define RT_LOG_LOG_WARNING _RT_LOG
+
+
+#endif
+
+#endif
\ No newline at end of file
Index: sys/net/route/route_helpers.c
===================================================================
--- sys/net/route/route_helpers.c
+++ sys/net/route/route_helpers.c
@@ -571,3 +571,4 @@
return (NULL);
}
#endif
+
Index: sys/net/route/route_tables.c
===================================================================
--- sys/net/route/route_tables.c
+++ sys/net/route/route_tables.c
@@ -262,6 +262,8 @@
#ifdef FIB_ALGO
vnet_fib_init();
#endif
+ vnet_nhops_init_neigh();
+
RTABLES_LOCK_INIT();
RTABLES_LOCK();
@@ -306,6 +308,7 @@
#ifdef FIB_ALGO
vnet_fib_destroy();
#endif
+ vnet_nhop_destroy_neigh();
}
VNET_SYSUNINIT(rtables_destroy, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST,
rtables_destroy, 0);
Index: sys/net/route/route_var.h
===================================================================
--- sys/net/route/route_var.h
+++ sys/net/route/route_var.h
@@ -247,6 +247,8 @@
void nhops_init(void);
int nhops_init_rib(struct rib_head *rh);
void nhops_destroy_rib(struct rib_head *rh);
+void vnet_nhops_init_neigh(void);
+void vnet_nhop_destroy_neigh(void);
void nhop_ref_object(struct nhop_object *nh);
int nhop_try_ref_object(struct nhop_object *nh);
void nhop_ref_any(struct nhop_object *nh);
Index: sys/net/vnet.h
===================================================================
--- sys/net/vnet.h
+++ sys/net/vnet.h
@@ -240,6 +240,8 @@
extern struct vnet *vnet0;
#define IS_DEFAULT_VNET(arg) ((arg) == vnet0)
+#define VNET_IS_DYING(_vnet) \
+ ((_vnet)->vnet_shutdown && (_vnet)->vnet_state < SI_SUB_VNET_DONE)
#define CRED_TO_VNET(cr) (cr)->cr_prison->pr_vnet
#define TD_TO_VNET(td) CRED_TO_VNET((td)->td_ucred)
Index: sys/netinet/if_ether.c
===================================================================
--- sys/netinet/if_ether.c
+++ sys/netinet/if_ether.c
@@ -1001,6 +1001,7 @@
if (la_tmp == NULL) {
arp_mark_lle_reachable(la);
LLE_WUNLOCK(la);
+ nhops_update_neigh(ifp, AF_INET, la);
} else {
/* Free newly-create entry and handle packet */
lltable_free_entry(LLTABLE(ifp), la);
@@ -1239,8 +1240,11 @@
lladdr_off) == 0)
return;
+ nhops_update_neigh(ifp, AF_INET, la);
+
/* Clear fast path feedback request if set */
llentry_mark_used(la);
+ nhops_stop_feedback(ifp, AF_INET, la);
}
arp_mark_lle_reachable(la);
Index: sys/netinet/ip_fastfwd.c
===================================================================
--- sys/netinet/ip_fastfwd.c
+++ sys/netinet/ip_fastfwd.c
@@ -433,6 +433,7 @@
ro.ro_flags |= RT_HAS_GW;
} else
gw = (const struct sockaddr *)dst;
+ route_set_prepend_nh(&ro, nh);
/*
* Handle redirect case.
Index: sys/netinet/ip_output.c
===================================================================
--- sys/netinet/ip_output.c
+++ sys/netinet/ip_output.c
@@ -301,6 +301,8 @@
ro->ro_flags |= (nh_flags & NHF_REJECT) ? RT_REJECT : 0;
ro->ro_flags |= (nh_flags & NHF_BLACKHOLE) ? RT_BLACKHOLE : 0;
ro->ro_flags |= (nh_flags & NHF_GATEWAY) ? RT_HAS_GW : 0;
+
+ route_set_prepend_nh(ro, nh);
}
/*
Index: sys/netinet6/nd6.c
===================================================================
--- sys/netinet6/nd6.c
+++ sys/netinet6/nd6.c
@@ -698,10 +698,10 @@
delay = (long)ND_IFINFO(ifp)->retrans * hz / 1000;
break;
case ND6_LLINFO_REACHABLE:
- if (!ND6_LLINFO_PERMANENT(lle)) {
- ifp = lle->lle_tbl->llt_ifp;
+ ifp = lle->lle_tbl->llt_ifp;
+ if (!ND6_LLINFO_PERMANENT(lle))
delay = (long)ND_IFINFO(ifp)->reachable * hz;
- }
+ nhops_stop_feedback(ifp, AF_INET6, lle);
break;
case ND6_LLINFO_STALE:
@@ -1420,6 +1420,7 @@
/* Update data */
lltable_set_entry_addr(ifp, lle, buf, sz, off);
+ nhops_update_neigh(ifp, AF_INET6, lle);
struct llentry *child_lle;
CK_SLIST_FOREACH(child_lle, &lle->lle_children, lle_child_next) {
@@ -1429,6 +1430,7 @@
if (lltable_calc_llheader(ifp, fam, lladdr, buf, &sz, &off) == 0) {
/* success */
lltable_set_entry_addr(ifp, child_lle, buf, sz, off);
+ nhops_update_neigh(ifp, AF_INET6, child_lle);
child_lle->ln_state = ND6_LLINFO_REACHABLE;
}
LLE_WUNLOCK(child_lle);
@@ -2052,6 +2054,7 @@
if (ln_tmp == NULL) {
/* No existing lle, mark as new entry (6,7) */
is_newentry = 1;
+ nhops_update_neigh(ifp, AF_INET6, ln);
if (lladdr != NULL) { /* (7) */
nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
EVENTHANDLER_INVOKE(lle_event, ln,
Index: usr.bin/netstat/nhops.c
===================================================================
--- usr.bin/netstat/nhops.c
+++ usr.bin/netstat/nhops.c
@@ -312,8 +312,9 @@
xo_emit("{t:refcount/%*lu} ", wid_refcnt, nh->nh_refcount);
if (Wflag && nh->prepend_len) {
- char *prepend_hex = "AABBCCDDEE";
- xo_emit(" {:nhop-prepend/%*s}", wid_prepend, prepend_hex);
+ for (int i = 0; i < nh->prepend_len; i++)
+ snprintf(&buffer[i * 2], 3, "%02X", nh->nh_prepend[i]);
+ xo_emit(" {:nhop-prepend/%*s}", wid_prepend, buffer);
}
xo_emit("\n");
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Nov 20, 8:31 AM (13 h, 3 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14734044
Default Alt Text
D33658.id100573.diff (47 KB)
Attached To
Mode
D33658: Pre-calculate L2 prepends for routes with gateway and avoid arp/nd lookup
Attached
Detach File
Event Timeline
Log In to Comment