Page MenuHomeFreeBSD

D36002.id108752.diff
No OneTemporary

D36002.id108752.diff

Index: sys/compat/linux/linux.c
===================================================================
--- sys/compat/linux/linux.c
+++ sys/compat/linux/linux.c
@@ -47,6 +47,7 @@
#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/if_types.h>
+#include <netlink/netlink.h>
#include <sys/un.h>
#include <netinet/in.h>
@@ -364,6 +365,8 @@
return (AF_IPX);
case LINUX_AF_APPLETALK:
return (AF_APPLETALK);
+ case LINUX_AF_NETLINK:
+ return (AF_NETLINK);
}
return (-1);
}
@@ -387,6 +390,8 @@
return (LINUX_AF_IPX);
case AF_APPLETALK:
return (LINUX_AF_APPLETALK);
+ case AF_NETLINK:
+ return (LINUX_AF_NETLINK);
}
return (-1);
}
@@ -514,6 +519,14 @@
}
}
+ if (bdom == AF_NETLINK) {
+ if (salen < sizeof(struct sockaddr_nl)) {
+ error = EINVAL;
+ goto out;
+ }
+ salen = sizeof(struct sockaddr_nl);
+ }
+
sa = (struct sockaddr *)kosa;
sa->sa_family = bdom;
sa->sa_len = salen;
Index: sys/compat/linux/linux_socket.c
===================================================================
--- sys/compat/linux/linux_socket.c
+++ sys/compat/linux/linux_socket.c
@@ -91,6 +91,8 @@
l_uint, struct msghdr *);
static int linux_set_socket_flags(int, int *);
+#define SOL_NETLINK 270
+
static int
linux_to_bsd_sockopt_level(int level)
{
@@ -2091,6 +2093,10 @@
case IPPROTO_TCP:
name = linux_to_bsd_tcp_sockopt(args->optname);
break;
+ case SOL_NETLINK:
+ level = SOL_SOCKET;
+ name = args->optname;
+ break;
default:
name = -1;
break;
Index: sys/kern/uipc_domain.c
===================================================================
--- sys/kern/uipc_domain.c
+++ sys/kern/uipc_domain.c
@@ -239,6 +239,29 @@
mtx_unlock(&dom_mtx);
}
+void
+domain_remove(void *data)
+{
+ struct domain *dp = (struct domain *)data;
+
+ if ((dp->dom_flags & DOMF_UNLOADABLE) == 0)
+ return;
+
+ mtx_lock(&dom_mtx);
+ if (domains == dp) {
+ domains = dp->dom_next;
+ } else {
+ struct domain *curr;
+ for (curr = domains; curr != NULL; curr = curr->dom_next) {
+ if (curr->dom_next == dp) {
+ curr->dom_next = dp->dom_next;
+ break;
+ }
+ }
+ }
+ mtx_unlock(&dom_mtx);
+}
+
/* ARGSUSED*/
static void
domaininit(void *dummy)
Index: sys/modules/linux_common/Makefile
===================================================================
--- sys/modules/linux_common/Makefile
+++ sys/modules/linux_common/Makefile
@@ -16,6 +16,8 @@
EXPORT_SYMS+= linux_get_osname
EXPORT_SYMS+= linux_get_osrelease
EXPORT_SYMS+= linux_use_real_ifname
+EXPORT_SYMS+= linux_to_bsd_domain
+EXPORT_SYMS+= bsd_to_linux_domain
.if !defined(KERNBUILDDIR)
.warning Building Linuxulator outside of a kernel does not make sense
Index: sys/modules/netlink/Makefile
===================================================================
--- /dev/null
+++ sys/modules/netlink/Makefile
@@ -0,0 +1,7 @@
+.PATH: ${SRCTOP}/sys/netlink
+KMOD= netlink
+
+SRCS = netlink_module.c netlink_domain.c netlink_iface.c netlink_io.c \
+ netlink_message.c netlink_route.c netlink_nhop.c netlink_linux.c
+
+.include <bsd.kmod.mk>
Index: sys/net/route.h
===================================================================
--- sys/net/route.h
+++ sys/net/route.h
@@ -344,15 +344,17 @@
void *);
struct rt_addrinfo {
- int rti_addrs; /* Route RTF_ flags */
+ uint16_t rti_addrs; /* rti_info bitmask */
+ uint8_t rti_family; /* address family to operate on */
+ uint8_t rti_spare2;
int rti_flags; /* Route RTF_ flags */
struct sockaddr *rti_info[RTAX_MAX]; /* Sockaddr data */
struct ifaddr *rti_ifa; /* value of rt_ifa addr */
struct ifnet *rti_ifp; /* route interface */
rib_filter_f_t *rti_filter; /* filter function */
- void *rti_filterdata; /* filter parameters */
- u_long rti_mflags; /* metrics RTV_ flags */
- u_long rti_spare; /* Will be used for fib */
+ void *rti_filterdata; /* filter parameters */
+ uint32_t rti_mflags; /* metrics RTV_ flags */
+ uint32_t rti_fibnum; /* Will be used for fib */
struct rt_metrics *rti_rmx; /* Pointer to route metrics */
};
Index: sys/net/route/nhgrp_ctl.c
===================================================================
--- sys/net/route/nhgrp_ctl.c
+++ sys/net/route/nhgrp_ctl.c
@@ -598,7 +598,7 @@
*/
int
nhgrp_get_group(struct rib_head *rh, struct weightened_nhop *wn, int num_nhops,
- struct route_nhop_data *rnd)
+ struct nhgrp_object **pnhg)
{
struct nh_control *ctl = rh->nh_control;
struct nhgrp_priv *nhg_priv;
@@ -606,8 +606,7 @@
nhg_priv = get_nhgrp(ctl, wn, num_nhops, &error);
if (nhg_priv != NULL)
- rnd->rnd_nhgrp = nhg_priv->nhg;
- rnd->rnd_weight = 0;
+ *pnhg = nhg_priv->nhg;
return (error);
}
@@ -718,14 +717,14 @@
* Returns pointer to array of nexthops with weights for
* given @nhg. Stores number of items in the array into @pnum_nhops.
*/
-struct weightened_nhop *
-nhgrp_get_nhops(struct nhgrp_object *nhg, uint32_t *pnum_nhops)
+const struct weightened_nhop *
+nhgrp_get_nhops(const struct nhgrp_object *nhg, uint32_t *pnum_nhops)
{
- struct nhgrp_priv *nhg_priv;
+ const struct nhgrp_priv *nhg_priv;
KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath"));
- nhg_priv = NHGRP_PRIV(nhg);
+ nhg_priv = NHGRP_PRIV_CONST(nhg);
*pnum_nhops = nhg_priv->nhg_nh_count;
return (nhg_priv->nhg_nh_weights);
Index: sys/net/route/nhop.h
===================================================================
--- sys/net/route/nhop.h
+++ sys/net/route/nhop.h
@@ -176,6 +176,7 @@
struct rib_head;
uint32_t nhop_get_idx(const struct nhop_object *nh);
+uint32_t nhop_get_uidx(const struct nhop_object *nh);
enum nhop_type nhop_get_type(const struct nhop_object *nh);
int nhop_get_rtflags(const struct nhop_object *nh);
struct vnet *nhop_get_vnet(const struct nhop_object *nh);
@@ -184,6 +185,8 @@
int nhop_get_neigh_family(const struct nhop_object *nh);
uint32_t nhop_get_fibnum(const struct nhop_object *nh);
+int nhop_clone_flags(const struct nhop_object *nh_orig, int nh_flags,
+ struct nhop_object **pnh);
#endif /* _KERNEL */
/* Kernel <> userland structures */
Index: sys/net/route/nhop_ctl.c
===================================================================
--- sys/net/route/nhop_ctl.c
+++ sys/net/route/nhop_ctl.c
@@ -364,13 +364,15 @@
nh_priv = *pnh_priv;
/* Give the protocols chance to augment the request data */
- dst = info->rti_info[RTAX_DST];
- netmask = info->rti_info[RTAX_NETMASK];
+ if (info != NULL) {
+ dst = info->rti_info[RTAX_DST];
+ netmask = info->rti_info[RTAX_NETMASK];
- error = rnh->rnh_preadd(rnh->rib_fibnum, dst, netmask, nh_priv->nh);
- if (error != 0) {
- uma_zfree(nhops_zone, nh_priv->nh);
- return (error);
+ error = rnh->rnh_preadd(rnh->rib_fibnum, dst, netmask, nh_priv->nh);
+ if (error != 0) {
+ uma_zfree(nhops_zone, nh_priv->nh);
+ return (error);
+ }
}
tmp_priv = find_nhop(rnh->nh_control, nh_priv);
@@ -450,26 +452,15 @@
return (0);
}
-/*
- * Creates new nexthop based on @nh_orig and augmentation data from @info.
- * Helper function used in the route changes, please see
- * alter_nhop_from_info() comments for more details.
- *
- * Returns:
- * 0 on success, filling @nh_ret with the desired nexthop object
- * errno otherwise
- */
-int
-nhop_create_from_nhop(struct rib_head *rnh, const struct nhop_object *nh_orig,
- struct rt_addrinfo *info, struct nhop_object **pnh)
+static struct nhop_object *
+clone_nhop(const struct nhop_object *nh_orig)
{
struct nhop_priv *nh_priv;
struct nhop_object *nh;
- int error;
-
- NET_EPOCH_ASSERT();
nh_priv = alloc_nhop_structure();
+ if (nh_priv == NULL)
+ return (NULL);
nh = nh_priv->nh;
/* Start with copying data from original nexthop */
@@ -486,15 +477,69 @@
nh->nh_flags = nh_orig->nh_flags;
memcpy(&nh->gw_sa, &nh_orig->gw_sa, nh_orig->gw_sa.sa_len);
+ return (nh);
+}
+
+/*
+ * Creates new nexthop based on @nh_orig and augmentation data from @info.
+ * Helper function used in the route changes, please see
+ * alter_nhop_from_info() comments for more details.
+ *
+ * Returns:
+ * 0 on success, filling @nh_ret with the desired nexthop object
+ * errno otherwise
+ */
+int
+nhop_create_from_nhop(struct rib_head *rnh, const struct nhop_object *nh_orig,
+ struct rt_addrinfo *info, struct nhop_object **pnh)
+{
+ struct nhop_priv *nh_priv;
+ struct nhop_object *nh;
+ int error;
+
+ NET_EPOCH_ASSERT();
+
+ nh = clone_nhop(nh_orig);
+ if (nh == NULL)
+ return (ENOMEM);
+
error = alter_nhop_from_info(nh, info);
if (error != 0) {
- uma_zfree(nhops_zone, nh_priv->nh);
+ uma_zfree(nhops_zone, nh);
return (error);
}
error = get_nhop(rnh, info, &nh_priv);
if (error == 0)
- *pnh = nh_priv->nh;
+ *pnh = nh;
+
+ return (error);
+}
+
+int
+nhop_clone_flags(const struct nhop_object *nh_orig, int nh_flags,
+ struct nhop_object **pnh)
+{
+ struct nhop_object *nh;
+ struct nhop_priv *nh_priv;
+ int error;
+
+ NET_EPOCH_ASSERT();
+
+ nh = clone_nhop(nh_orig);
+ if (nh == NULL)
+ return (ENOMEM);
+ nh_priv = nh->nh_priv;
+
+ nh->nh_flags |= nh_flags;
+ if (nh_flags & NHF_HOST)
+ nh_priv->rt_flags |= RTF_HOST;
+
+ struct rib_head *rnh;
+ rnh = rt_tables_get_rnh(nh_priv->nh_fibnum, nh_priv->nh_upper_family);
+ error = get_nhop(rnh, NULL, &nh_priv);
+ if (error == 0)
+ *pnh = nh;
return (error);
}
@@ -735,6 +780,12 @@
return (nh->nh_priv->nh_idx);
}
+uint32_t
+nhop_get_uidx(const struct nhop_object *nh)
+{
+ return (nh->nh_priv->nh_uidx);
+}
+
enum nhop_type
nhop_get_type(const struct nhop_object *nh)
{
Index: sys/net/route/nhop_var.h
===================================================================
--- sys/net/route/nhop_var.h
+++ sys/net/route/nhop_var.h
@@ -78,6 +78,7 @@
uint8_t nh_neigh_family;/* neighbor address family */
uint16_t nh_type; /* nexthop type */
uint32_t rt_flags; /* routing flags for the control plane */
+ uint32_t nh_uidx; /* userland-provided index */
/* nhop lookup comparison end */
uint32_t nh_idx; /* nexthop index */
uint32_t nh_fibnum; /* nexthop fib */
@@ -104,5 +105,7 @@
/* nhop_ctl.c */
int cmp_priv(const struct nhop_priv *_one, const struct nhop_priv *_two);
+int nhop_clone_flags(const struct nhop_object *nh_orig, int rt_flags,
+ struct nhop_object **pnh);
#endif
Index: sys/net/route/route_ctl.h
===================================================================
--- sys/net/route/route_ctl.h
+++ sys/net/route/route_ctl.h
@@ -35,6 +35,8 @@
#ifndef _NET_ROUTE_ROUTE_CTL_H_
#define _NET_ROUTE_ROUTE_CTL_H_
+#include <sys/ck.h>
+
struct rib_cmd_info {
uint8_t rc_cmd; /* RTM_ADD|RTM_DEL|RTM_CHANGE */
uint8_t spare[3];
@@ -135,7 +137,7 @@
/* Multipath */
struct weightened_nhop;
-struct weightened_nhop *nhgrp_get_nhops(struct nhgrp_object *nhg,
+const struct weightened_nhop *nhgrp_get_nhops(const struct nhgrp_object *nhg,
uint32_t *pnum_nhops);
uint32_t nhgrp_get_count(struct rib_head *rh);
@@ -160,4 +162,31 @@
void rib_unsubscribe(struct rib_subscription *rs);
void rib_unsubscribe_locked(struct rib_subscription *rs);
+/* Event bridge */
+
+/* Types of events */
+#define NLBR_EVENT_ROUTE 1
+
+/* Event providers */
+#define NLBR_PROVIDER_KERNEL 1
+#define NLBR_PROVIDER_RTSOCK 2
+#define NLBR_PROVIDER_NETLINK 3
+
+struct rib_event_bridge;
+typedef void rib_event_bridge_cb_t(uint32_t event_type, uint32_t fibnum,
+ const struct rt_addrinfo *info, const struct rib_cmd_info *rc, void *arg);
+
+struct rib_event_bridge {
+ rib_event_bridge_cb_t *reb_cb;
+ void *reb_cb_arg;
+ int reb_provider_id;
+ CK_STAILQ_ENTRY(rib_event_bridge) reb_link;
+};
+void rib_bridge_generic_event(int provider_id, uint32_t event_type, uint32_t val1,
+ const void *ptr1, const void *ptr2);
+void rib_bridge_rt_event(int provider_id, uint32_t fibnum, const struct rt_addrinfo *info,
+ const struct rib_cmd_info *rc);
+void rib_bridge_link(struct rib_event_bridge *reb);
+void rib_bridge_unlink(struct rib_event_bridge *reb);
+
#endif
Index: sys/net/route/route_ctl.c
===================================================================
--- sys/net/route/route_ctl.c
+++ sys/net/route/route_ctl.c
@@ -60,7 +60,7 @@
#define DEBUG_MOD_NAME route_ctl
#define DEBUG_MAX_LEVEL LOG_DEBUG
#include <net/route/route_debug.h>
-_DECLARE_DEBUG(LOG_INFO);
+_DECLARE_DEBUG(LOG_DEBUG3);
/*
* This file contains control plane routing tables functions.
@@ -156,7 +156,7 @@
*/
#ifdef ROUTE_MPATH
if (NH_IS_NHGRP(nh)) {
- struct weightened_nhop *wn;
+ const struct weightened_nhop *wn;
uint32_t num_nhops;
wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops);
nh = wn[0].nh;
@@ -1019,8 +1019,9 @@
{
int error = 0, found_idx = 0;
struct nhop_object *nh_orig = NULL, *nh_new;
- struct route_nhop_data rnd_new;
- struct weightened_nhop *wn = NULL, *wn_new;
+ struct route_nhop_data rnd_new = {};
+ const struct weightened_nhop *wn = NULL;
+ struct weightened_nhop *wn_new;
uint32_t num_nhops;
wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops);
@@ -1050,7 +1051,7 @@
wn_new[found_idx].nh = nh_new;
wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight);
- error = nhgrp_get_group(rnh, wn_new, num_nhops, &rnd_new);
+ error = nhgrp_get_group(rnh, wn_new, num_nhops, &rnd_new.rnd_nhgrp);
nhop_free(nh_new);
free(wn_new, M_TEMP);
@@ -1395,7 +1396,7 @@
if (report) {
#ifdef ROUTE_MPATH
struct nhgrp_object *nhg;
- struct weightened_nhop *wn;
+ const struct weightened_nhop *wn;
uint32_t num_nhops;
if (NH_IS_NHGRP(nh)) {
nhg = (struct nhgrp_object *)nh;
@@ -1620,3 +1621,65 @@
RIB_WUNLOCK(rnh);
NET_EPOCH_EXIT(et);
}
+
+
+CK_STAILQ_HEAD(rib_event_bridge_head, rib_event_bridge);
+static struct rib_event_bridge_head bridge_head;
+struct mtx bridge_lock;
+
+static void
+rib_bridge_init(void)
+{
+ CK_STAILQ_INIT(&bridge_head);
+ mtx_init(&bridge_lock, "rib_event_bridge_lock", NULL, MTX_DEF);
+}
+SYSINIT(rib_bridge_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND, rib_bridge_init, NULL);
+
+
+void
+rib_bridge_generic_event(int provider_id, uint32_t event_type, uint32_t val1,
+ const void *ptr1, const void *ptr2)
+{
+ struct rib_event_bridge *reb;
+
+ NET_EPOCH_ASSERT();
+
+ CK_STAILQ_FOREACH(reb, &bridge_head, reb_link) {
+ RT_LOG(LOG_DEBUG, "HERE reb %p %d", reb, reb->reb_provider_id);
+ if (reb->reb_provider_id != provider_id)
+ reb->reb_cb(event_type, val1, ptr1, ptr2, reb->reb_cb_arg);
+ }
+}
+
+void
+rib_bridge_rt_event(int provider_id, uint32_t fibnum, const struct rt_addrinfo *info,
+ const struct rib_cmd_info *rc)
+{
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG3
+ char rtbuf[INET6_ADDRSTRLEN + 5];
+ FIB_LOG(LOG_DEBUG3, fibnum, rt_get_family(rc->rc_rt), "received cmd %s for %s",
+ rib_print_cmd(rc->rc_cmd), rt_print_buf(rc->rc_rt, rtbuf, sizeof(rtbuf)));
+#endif
+ RT_LOG(LOG_DEBUG, "HERE %u", fibnum);
+ rib_bridge_generic_event(provider_id, NLBR_EVENT_ROUTE, fibnum, info, rc);
+}
+
+
+void
+rib_bridge_link(struct rib_event_bridge *reb)
+{
+ mtx_lock(&bridge_lock);
+ CK_STAILQ_INSERT_HEAD(&bridge_head, reb, reb_link);
+ mtx_unlock(&bridge_lock);
+ RT_LOG(LOG_DEBUG, "link %p", reb);
+}
+
+void
+rib_bridge_unlink(struct rib_event_bridge *reb)
+{
+ mtx_lock(&bridge_lock);
+ CK_STAILQ_REMOVE(&bridge_head, reb, rib_event_bridge, reb_link);
+ mtx_unlock(&bridge_lock);
+ RT_LOG(LOG_DEBUG, "unlink %p", reb);
+}
+
Index: sys/net/route/route_helpers.c
===================================================================
--- sys/net/route/route_helpers.c
+++ sys/net/route/route_helpers.c
@@ -290,7 +290,7 @@
void *cbdata)
{
uint32_t num_old, num_new;
- struct weightened_nhop *wn_old, *wn_new;
+ const struct weightened_nhop *wn_old, *wn_new;
struct weightened_nhop tmp = { NULL, 0 };
uint32_t idx_old = 0, idx_new = 0;
@@ -378,7 +378,7 @@
rib_decompose_notification(struct rib_cmd_info *rc, route_notification_t *cb,
void *cbdata)
{
- struct weightened_nhop *wn;
+ const struct weightened_nhop *wn;
uint32_t num_nhops;
struct rib_cmd_info rc_new;
Index: sys/net/route/route_var.h
===================================================================
--- sys/net/route/route_var.h
+++ sys/net/route/route_var.h
@@ -306,7 +306,7 @@
int nhgrp_dump_sysctl(struct rib_head *rh, struct sysctl_req *w);
int nhgrp_get_group(struct rib_head *rh, struct weightened_nhop *wn,
- int num_nhops, struct route_nhop_data *rnd);
+ int num_nhops, struct nhgrp_object **pnhg);
typedef bool nhgrp_filter_cb_t(const struct nhop_object *nh, void *data);
int nhgrp_get_filtered_group(struct rib_head *rh, const struct nhgrp_object *src,
nhgrp_filter_cb_t flt_func, void *flt_data, struct route_nhop_data *rnd);
Index: sys/net/rtsock.c
===================================================================
--- sys/net/rtsock.c
+++ sys/net/rtsock.c
@@ -718,7 +718,7 @@
if (!NH_IS_NHGRP(nh))
return (nh);
#ifdef ROUTE_MPATH
- struct weightened_nhop *wn;
+ const struct weightened_nhop *wn;
uint32_t num_nhops;
wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops);
if (gw == NULL)
@@ -1126,6 +1126,7 @@
}
error = rib_action(fibnum, rtm->rtm_type, &info, &rc);
if (error == 0) {
+ rib_bridge_rt_event(NLBR_PROVIDER_RTSOCK, fibnum, &info, &rc);
#ifdef ROUTE_MPATH
if (NH_IS_NHGRP(rc.rc_nh_new) ||
(rc.rc_nh_old && NH_IS_NHGRP(rc.rc_nh_old))) {
@@ -1147,6 +1148,7 @@
case RTM_DELETE:
error = rib_action(fibnum, RTM_DELETE, &info, &rc);
if (error == 0) {
+ rib_bridge_rt_event(NLBR_PROVIDER_RTSOCK, fibnum, &info, &rc);
#ifdef ROUTE_MPATH
if (NH_IS_NHGRP(rc.rc_nh_old) ||
(rc.rc_nh_new && NH_IS_NHGRP(rc.rc_nh_new))) {
@@ -2243,7 +2245,7 @@
nh = rt_get_raw_nhop(rt);
#ifdef ROUTE_MPATH
if (NH_IS_NHGRP(nh)) {
- struct weightened_nhop *wn;
+ const struct weightened_nhop *wn;
uint32_t num_nhops;
int error;
wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops);
Index: sys/netinet/in_fib.c
===================================================================
--- sys/netinet/in_fib.c
+++ sys/netinet/in_fib.c
@@ -196,7 +196,7 @@
{
#ifdef ROUTE_MPATH
if (NH_IS_NHGRP(nh)) {
- struct weightened_nhop *wn;
+ const struct weightened_nhop *wn;
uint32_t num_nhops;
wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops);
for (int i = 0; i < num_nhops; i++) {
Index: sys/netinet6/in6_fib.c
===================================================================
--- sys/netinet6/in6_fib.c
+++ sys/netinet6/in6_fib.c
@@ -205,7 +205,7 @@
{
#ifdef ROUTE_MPATH
if (NH_IS_NHGRP(nh)) {
- struct weightened_nhop *wn;
+ const struct weightened_nhop *wn;
uint32_t num_nhops;
wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops);
for (int i = 0; i < num_nhops; i++) {
Index: sys/netlink/netlink.h
===================================================================
--- /dev/null
+++ sys/netlink/netlink.h
@@ -0,0 +1,233 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Ng Peng Nam Sean
+ * Copyright (c) 2022 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This file contains structures and constants for RFC 3549 (Netlink)
+ * protocol. Some values have been taken from Linux implementation.
+ */
+
+#ifndef _NETLINK_LINUX_NETLINK_H_
+#define _NETLINK_LINUX_NETLINK_H_
+
+#ifndef _KERNEL
+#ifndef PF_NETLINK
+#define PF_NETLINK 38
+#endif
+#ifndef AF_NETLINK
+#define AF_NETLINK 38
+#endif
+#ifndef AF_MPLS
+#define AF_MPLS 39
+#endif
+#endif
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+struct sockaddr_nl {
+ uint8_t nl_len; /* total length */
+ sa_family_t nl_family; /* AF_NETLINK */
+ uint16_t nl_pad; /* zero */
+ uint32_t nl_pid; /* port ID */
+ uint32_t nl_groups; /* multicast groups mask */
+};
+
+#define SOL_NETLINK 270
+
+/* Currently supported socket options */
+#define NETLINK_ADD_MEMBERSHIP 1
+#define NETLINK_DROP_MEMBERSHIP 2
+#define NETLINK_PKTINFO 3 /* XXX: not supported */
+#define NETLINK_BROADCAST_ERROR 4 /* XXX: not supported */
+#define NETLINK_NO_ENOBUFS 5 /* XXX: not supported */
+#define NETLINK_RX_RING 6 /* XXX: not supported */
+#define NETLINK_TX_RING 7 /* XXX: not supported */
+#define NETLINK_LISTEN_ALL_NSID 8 /* XXX: not supported */
+
+#define NETLINK_LIST_MEMBERSHIPS 9
+#define NETLINK_CAP_ACK 10
+#define NETLINK_EXT_ACK 11
+#define NETLINK_GET_STRICT_CHK 12 /* XXX: not supported */
+
+
+/*
+ * RFC 3549, 2.3.2 Netlink Message Header
+ */
+struct nlmsghdr {
+ uint32_t nlmsg_len; /* Length of message including header */
+ uint16_t nlmsg_type; /* Message type identifier */
+ uint16_t nlmsg_flags; /* Flags (NLM_F_) */
+ uint32_t nlmsg_seq; /* Sequence number */
+ uint32_t nlmsg_pid; /* Sending process port ID */
+};
+
+/*
+ * RFC 3549, 2.3.2.2 The ACK Netlink Message
+ */
+struct nlmsgerr {
+ int error;
+ struct nlmsghdr msg;
+};
+
+/*
+ * RFC 3549, 2.3.2 standard flag bits (nlm_flags)
+ */
+#define NLM_F_REQUEST 0x01 /* It is request message. */
+#define NLM_F_MULTI 0x02 /* Multipart message, terminated by NLMSG_DONE */
+#define NLM_F_ACK 0x04 /* Reply with ack, with zero or error code */
+#define NLM_F_ECHO 0x08 /* Echo this request */
+#define NLM_F_DUMP_INTR 0x10 /* Dump was inconsistent due to sequence change */
+#define NLM_F_DUMP_FILTERED 0x20 /* Dump was filtered as requested */
+
+/*
+ * RFC 3549, 2.3.2 Additional flag bits for GET requests
+ */
+#define NLM_F_ROOT 0x100 /* Return the complete table */
+#define NLM_F_MATCH 0x200 /* Return all entries matching criteria */
+#define NLM_F_ATOMIC 0x400 /* Return an atomic snapshot */
+#define NLM_F_DUMP (NLM_F_ROOT | NLM_F_MATCH)
+
+/*
+ * RFC 3549, 2.3.2 Additional flag bits for NEW requests
+ */
+#define NLM_F_REPLACE 0x100 /* Replace existing matching config object */
+#define NLM_F_EXCL 0x200 /* Don't replace the object if exists */
+#define NLM_F_CREATE 0x400 /* Create if it does not exist */
+#define NLM_F_APPEND 0x800 /* Add to end of list */
+
+/* Modifiers to DELETE request */
+#define NLM_F_NONREC 0x100 /* Do not delete recursively */
+
+/* Flags for ACK message */
+#define NLM_F_CAPPED 0x100 /* request was capped */
+#define NLM_F_ACK_TLVS 0x200 /* extended ACK TVLs were included */
+
+/*
+ * RFC 3549, 2.3.2 standard message types (nlmsg_type).
+ */
+#define NLMSG_NOOP 0x1 /* Message is ignored. */
+#define NLMSG_ERROR 0x2 /* reply error code reporting */
+#define NLMSG_DONE 0x3 /* Message terminates a multipart message. */
+#define NLMSG_OVERRUN 0x4 /* overrun detected, data is lost */
+
+#define NLMSG_MIN_TYPE 0x10 /* < 0x10: reserved control messages */
+
+/*
+ * Defition of numbers assigned to the netlink subsystems.
+ */
+#define NETLINK_ROUTE 0 /* Routing/device hook */
+#define NETLINK_UNUSED 1 /* (not used) */
+#define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */
+#define NETLINK_FIREWALL 3 /* (not used) */
+#define NETLINK_SOCK_DIAG 4 /* socket monitoring */
+#define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */
+#define NETLINK_XFRM 6 /* ipsec */
+#define NETLINK_SELINUX 7 /* SELinux event notifications */
+#define NETLINK_ISCSI 8 /* Open-iSCSI */
+#define NETLINK_AUDIT 9 /* auditing */
+#define NETLINK_FIB_LOOKUP 10
+#define NETLINK_CONNECTOR 11
+#define NETLINK_NETFILTER 12 /* netfilter subsystem */
+#define NETLINK_IP6_FW 13 /* (not used) */
+#define NETLINK_DNRTMSG 14 /* DECnet routing messages (not used) */
+#define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */
+#define NETLINK_GENERIC 16
+
+
+#ifndef roundup2
+#define roundup2(x, y) (((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */
+#endif
+#define NL_ITEM_ALIGN_SIZE sizeof(uint32_t)
+#define NL_ITEM_ALIGN(_len) roundup2(_len, NL_ITEM_ALIGN_SIZE)
+#define NL_ITEM_DATA(_ptr, _off) ((void *)((char *)(_ptr) + _off))
+#define NL_ITEM_DATA_CONST(_ptr, _off) ((const void *)((const char *)(_ptr) + _off))
+
+#define NL_ITEM_OK(_ptr, _len, _hlen, _DLEN) \
+ ((_len) >= _hlen && _DLEN(_ptr) = _hlen && _DLEN(_ptr) <= (_len))
+#define NL_ITEM_NEXT(_ptr, _LEN_MACRO) (typeof(_ptr)NL_ITEM_DATA(_LEN_MACRO(_ptr)))
+#define NL_ITEM_ITER(_ptr, _len, _LEN_MACRO) \
+ ((_len) -= _LEN_MACRO(_ptr), (_ptr) = NL_ITEM_NEXT(_ptr, _LEN_MACRO))
+
+
+#ifndef _KERNEL
+/* part of netlink(3) API */
+#define NLMSG_ALIGNTO NL_ITEM_ALIGN_SIZE
+#define NLMSG_ALIGN(_len) NL_ITEM_ALIGN(_len)
+#define NLMSG_HDRLEN ((int)sizeof(struct nlmsghdr))
+#define NLMSG_LENGTH(_len) ((_len) + NLMSG_HDRLEN)
+#define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(_len))
+#define NLMSG_DATA(_hdr) NL_ITEM_DATA(_hdr, 0)
+#define _NLMSG_LEN(_hdr) ((int)(_hdr)->nlmsg_len)
+#define _NLMSG_ALIGNED_LEN(_hdr) NLMSG_ALIGN(_NLMSG_LEN(_hdr))
+#define NLMSG_NEXT(_hdr, _len) NL_ITEM_ITER(_hdr, _len, _NLMSG_ALIGNED_LEN)
+#define NLMSG_OK(_hdr, _len) NL_ITEM_OK(_hdr, _len, NLMSG_HDRLEN, _NLMSG_LEN)
+#define NLMSG_PAYLOAD(_hdr,_len) (_NLMSG_LEN(_hdr) - NLMSG_SPACE((_len)))
+
+#else
+#define NLMSG_ALIGNTO 4U
+#define NLMSG_ALIGN(len) (((len) + NLMSG_ALIGNTO - 1) & ~(NLMSG_ALIGNTO - 1))
+#define NLMSG_HDRLEN ((int)NLMSG_ALIGN(sizeof(struct nlmsghdr)))
+#endif
+
+/*
+ * Base netlink attribute TLV header.
+ */
+struct nlattr {
+ uint16_t nla_len; /* Total attribute length */
+ uint16_t nla_type; /* Attribute type */
+};
+
+/*
+ *
+ * nl_type field enconding:
+ *
+ * 0 1
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |N|O| Attribute type |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * N - attribute contains other attributes
+ * O - encoded in network byte order
+ * Note: N & O are mutually exclusive
+ *
+ * Note: attribute type value scope normally is per-message
+ * or per message group.
+ */
+
+#define NLA_F_NESTED (1 << 15)
+#define NLA_F_NET_BYTEORDER (1 << 14)
+#define NLA_TYPE_MASK ~(NLA_F_NESTED | NLA_F_NET_BYTEORDER)
+
+/* Compatibility macro */
+#ifndef _KERNEL
+#define NLA_ALIGNTO NL_ITEM_ALIGN_SIZE
+#define NLA_ALIGN(_len) NL_ITEM_ALIGN(_len)
+#define NLA_HDRLEN ((int)sizeof(struct nlattr))
+#endif
+
+#endif
Index: sys/netlink/netlink_debug.h
===================================================================
--- /dev/null
+++ sys/netlink/netlink_debug.h
@@ -0,0 +1,78 @@
+/*-
+ * Copyright (c) 2022
+ * Alexander V. Chernikov <melifaro@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETLINK_NETLINK_DEBUG_H_
+#define _NETLINK_NETLINK_DEBUG_H_
+
+#include <sys/net/route/route_debug.h>
+
+/*
+ * Generic debug
+ * [nl_domain] func_name: debug text
+ */
+#define NL_DEBUG RT_DEBUG
+
+/*
+ * Logging for events specific for particular process
+ * Example: [nl_domain] PID 4834 fdump_sa: unsupported family: 45
+ */
+#define NL_RAW_PID_LOG(_l, _pid, _fmt, ...) NL_RAW_PID_LOG_##_l(_l, _fib, _fam, _fmt, ## __VA_ARGS__)
+#define _NL_RAW_PID_LOG(_l, _pid, _fmt, ...) if (_DEBUG_PASS_MSG(_l)) { \
+ _output("[" DEBUG_PREFIX_NAME "] PID %u %s: " _fmt "\n", _pid, __func__. ##__VA_ARGS__); \
+}
+
+#if DEBUG_MAX_LEVEL>=LOG_DEBUG3
+#define NL_RAW_PID_LOG_LOG_DEBUG3 _NL_RAW_PID_LOG
+#else
+#define NL_RAW_PID_LOG_LOG_DEBUG3(_l, _pid, _fmt, ...)
+#endif
+#if DEBUG_MAX_LEVEL>=LOG_DEBUG2
+#define NL_RAW_PID_LOG_LOG_DEBUG2 _NL_RAW_PID_LOG
+#else
+#define NL_RAW_PID_LOG_LOG_DEBUG2(_l, _pid, _fmt, ...)
+#endif
+#if DEBUG_MAX_LEVEL>=LOG_DEBUG
+#define NL_RAW_PID_LOG_LOG_DEBUG _NL_RAW_PID_LOG
+#else
+#define NL_RAW_PID_LOG_LOG_DEBUG(_l, _pid, _fmt, ...)
+#endif
+#if DEBUG_MAX_LEVEL>=LOG_INFO
+#define NL_RAW_PID_LOG_LOG_INFO _NL_RAW_PID_LOG
+#else
+#define NL_RAW_PID_LOG_LOG_INFO(_l, _pid, _fmt, ...)
+#endif
+#define NL_RAW_PID_LOG_LOG_NOTICE _NL_RAW_PID_LOG
+#define NL_RAW_PID_LOG_LOG_ERR _NL_RAW_PID_LOG
+#define NL_RAW_PID_LOG_LOG_WARNING _NL_RAW_PID_LOG
+
+
+
+#endif
Index: sys/netlink/netlink_domain.c
===================================================================
--- /dev/null
+++ sys/netlink/netlink_domain.c
@@ -0,0 +1,525 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Ng Peng Nam Sean
+ * Copyright (c) 2022 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This file contains socket and protocol bindings for netlink.
+ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/ck.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysent.h>
+#include <sys/syslog.h>
+
+#include <net/if.h>
+#include <net/netisr.h>
+
+#include <netlink/netlink.h>
+#include <netlink/netlink_var.h>
+
+#define DEBUG_MOD_NAME nl_domain
+#define DEBUG_MAX_LEVEL LOG_DEBUG3
+#include <net/route/route_debug.h>
+_DECLARE_DEBUG(LOG_DEBUG3);
+
+
+static u_long nl_sendspace = NLSNDQ;
+SYSCTL_ULONG(_net_netlink, OID_AUTO, sendspace, CTLFLAG_RW, &nl_sendspace, 0,
+ "Default netlink socket send space");
+
+static u_long nl_recvspace = NLSNDQ;
+SYSCTL_ULONG(_net_netlink, OID_AUTO, recvspace, CTLFLAG_RW, &nl_recvspace, 0,
+ "Default netlink socket receive space");
+
+/*
+ * Looks up a nlpcb struct based on the @portid. Need to claim nlsock_mtx.
+ * Returns nlpcb pointer if present else NULL
+ */
+static struct nlpcb *
+nl_port_lookup(uint32_t port_id)
+{
+ struct nlpcb *nlp;
+
+ CK_LIST_FOREACH(nlp, &V_nl_ctl->ctl_port_head, nl_port_next) {
+ if (nlp->nl_port == port_id)
+ return (nlp);
+ }
+ return (NULL);
+}
+
+static void
+nl_update_groups_locked(struct nlpcb *nlp, uint32_t nl_groups)
+{
+ /* Update group mask */
+ RT_LOG(LOG_DEBUG2, "socket %p, groups 0x%X -> 0x%X",
+ nlp->nl_socket, nlp->nl_groups, nl_groups);
+ nlp->nl_groups = nl_groups;
+}
+
+static uint32_t
+nl_find_port() {
+ /*
+ * app can open multiple netlink sockets.
+ * Start with current pid, if already taken,
+ * try random numbers in 65k..256k+65k space,
+ * avoiding clash with pids.
+ */
+ if (nl_port_lookup(curproc->p_pid) == NULL)
+ return (curproc->p_pid);
+ for (int i = 0; i < 16; i++) {
+ uint32_t nl_port = (arc4random() % 65536) + 65536 * 4;
+ if (nl_port_lookup(nl_port) == 0)
+ return (nl_port);
+ RT_LOG(LOG_DEBUG3, "tried %u\n", nl_port);
+ }
+ return (curproc->p_pid);
+}
+
+static int
+nl_bind_locked(struct nlpcb *nlp, struct sockaddr_nl *snl)
+{
+ if (nlp->nl_active) {
+ if (nlp->nl_port != snl->nl_pid) {
+ RT_LOG(LOG_DEBUG,
+ "bind() failed: program pid %d "
+ "is different from provided pid %d",
+ nlp->nl_port, snl->nl_pid);
+ return (EINVAL); // XXX: better error
+ }
+ } else {
+ if (snl->nl_pid == 0)
+ snl->nl_pid = nl_find_port();
+ if (nl_port_lookup(snl->nl_pid) != NULL)
+ return (EADDRINUSE);
+ nlp->nl_port = snl->nl_pid;
+ nlp->nl_active = true;
+ CK_LIST_INSERT_HEAD(&V_nl_ctl->ctl_port_head, nlp, nl_port_next);
+ }
+ nl_update_groups_locked(nlp, snl->nl_groups);
+
+ return (0);
+}
+
+static int
+nl_pru_attach(struct socket *so, int proto, struct thread *td)
+{
+ struct nlpcb *nlp;
+ int error;
+
+ bool is_linux = SV_PROC_ABI(td->td_proc) == SV_ABI_LINUX;
+ RT_LOG(LOG_DEBUG2, "socket %p, PID %d%s: attaching socket to netlink proto %d",
+ so, curproc->p_pid, is_linux ? "(linux)" : "", proto);
+
+ if (__predict_false(netlink_unloading != 0))
+ return (EAFNOSUPPORT);
+
+ /* Create per-VNET state on first socket init */
+ if (V_nl_ctl == NULL)
+ vnet_nl_ctl_init();
+ KASSERT(V_nl_ctl != NULL, ("nl_attach: vnet_sock_init() failed"));
+ MPASS(sotonlpcb(so) == NULL);
+
+ error = nl_verify_proto(proto);
+ if (error != 0)
+ return (error);
+
+ nlp = malloc(sizeof(struct nlpcb), M_PCB, M_WAITOK | M_ZERO);
+ error = soreserve(so, nl_sendspace, nl_recvspace);
+ if (error != 0) {
+ free(nlp, M_PCB);
+ return (error);
+ }
+ so->so_pcb = (void *)nlp;
+ nlp->nl_socket = so;
+ nlp->nl_proto = proto;
+ nlp->nl_process_id = curproc->p_pid;
+ nlp->nl_linux = is_linux;
+ NLP_LOCK_INIT(nlp);
+ refcount_init(&nlp->nl_refcount, 1);
+
+ nlp->nl_taskqueue = taskqueue_create("netlink_socket", M_WAITOK,
+ taskqueue_thread_enqueue, &nlp->nl_taskqueue);
+ TASK_INIT(&nlp->nl_task, 0, nl_taskqueue_handler, nlp);
+ taskqueue_start_threads(&nlp->nl_taskqueue, 1, PWAIT,
+ "netlink_socket (PID %u)", nlp->nl_process_id);
+
+ CTL_WLOCK();
+ CK_LIST_INSERT_HEAD(&V_nl_ctl->ctl_pcb_head, nlp, nl_next);
+ CTL_WUNLOCK();
+
+ soisconnected(so);
+
+ return (0);
+}
+
+static void
+nl_pru_abort(struct socket *so)
+{
+ RT_LOG(LOG_DEBUG2, "socket %p, PID %d", so, curproc->p_pid);
+ MPASS(sotonlpcb(so) != NULL);
+ soisdisconnected(so);
+}
+
+static int
+nl_pru_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+ struct nlpcb *nlp = sotonlpcb(so);
+ struct sockaddr_nl *snl = (struct sockaddr_nl *)nam;
+ int error;
+
+ RT_LOG(LOG_DEBUG2, "socket %p, PID %d", so, curproc->p_pid);
+ if (snl->nl_len != sizeof(*snl)) {
+ RT_LOG(LOG_DEBUG, "socket %p, wrong sizeof(), ignoring bind()", so);
+ return (EINVAL);
+ }
+
+ CTL_WLOCK();
+ NLP_LOCK(nlp);
+ error = nl_bind_locked(nlp, snl);
+ NLP_UNLOCK(nlp);
+ CTL_WUNLOCK();
+ RT_LOG(LOG_DEBUG2, "socket %p, bind() to %u, groups %u, error %d", so,
+ snl->nl_pid, snl->nl_groups, error);
+
+ return (error);
+}
+
+
+static int
+nl_assign_port(struct nlpcb *nlp, uint32_t port_id)
+{
+ struct sockaddr_nl snl = {
+ .nl_pid = port_id,
+ };
+ int error;
+
+ CTL_WLOCK();
+ NLP_LOCK(nlp);
+ snl.nl_groups = nlp->nl_groups;
+ error = nl_bind_locked(nlp, &snl);
+ NLP_UNLOCK(nlp);
+ CTL_WUNLOCK();
+
+ RT_LOG(LOG_DEBUG2, "socket %p, port assign: %d, error: %d", nlp->nl_socket, port_id, error);
+ return (error);
+}
+
+/*
+ * nl_autobind_port binds a unused portid to @nlp
+ * @nlp: pcb data for the netlink socket
+ * @candidate_id: first id to consider
+ */
+static int
+nl_autobind_port(struct nlpcb *nlp, uint32_t candidate_id)
+{
+ uint32_t port_id = candidate_id;
+ CTL_TRACKER;
+ bool exist;
+ int error;
+
+ for (int i = 0; i < 10; i++) {
+ RT_LOG(LOG_DEBUG3, "socket %p, trying to assign port %d", nlp->nl_socket, port_id);
+ CTL_RLOCK();
+ exist = nl_port_lookup(port_id) != 0;
+ CTL_RUNLOCK();
+ if (!exist) {
+ error = nl_assign_port(nlp, port_id);
+ if (error != EADDRINUSE)
+ break;
+ }
+ port_id++;
+ }
+ RT_LOG(LOG_DEBUG2, "socket %p, autobind to %d, error: %d", nlp->nl_socket, port_id, error);
+ return (error);
+}
+
+static int
+nl_pru_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+ struct sockaddr_nl *snl = (struct sockaddr_nl *)nam;
+ struct nlpcb *nlp;
+
+ RT_LOG(LOG_DEBUG2, "socket %p, PID %d", so, curproc->p_pid);
+ if (snl->nl_len != sizeof(*snl)) {
+ RT_LOG(LOG_DEBUG, "socket %p, wrong sizeof(), ignoring bind()", so);
+ return (EINVAL);
+ }
+
+ nlp = sotonlpcb(so);
+ if (!nlp->nl_active) {
+ int error = nl_autobind_port(nlp, td->td_proc->p_pid);
+ if (error != 0) {
+ RT_LOG(LOG_DEBUG, "socket %p, nl_autobind() failed: %d", so, error);
+ return (error);
+ }
+ }
+ /* XXX: Handle socket flags & multicast */
+ soisconnected(so);
+
+ RT_LOG(LOG_DEBUG2, "socket %p, connect to %u", so, snl->nl_pid);
+
+ return (0);
+}
+
+static void
+destroy_socket(struct nlpcb *nlp)
+{
+ NLP_LOCK(nlp);
+ NLP_LOCK_DESTROY(nlp);
+ free(nlp, M_PCB);
+}
+
+static void
+destroy_socket_epoch(epoch_context_t ctx)
+{
+ struct nlpcb *nlp;
+
+ nlp = __containerof(ctx, struct nlpcb, nl_epoch_ctx);
+
+ destroy_socket(nlp);
+}
+
+
+static void
+nl_pru_detach(struct socket *so)
+{
+ MPASS(sotonlpcb(so) != NULL);
+ struct nlpcb *nlp;
+
+ RT_LOG(LOG_DEBUG2, "socket %p, PID %d", so, curproc->p_pid);
+ nlp = sotonlpcb(so);
+
+ /* Mark as inactive so no new work can be enqueued */
+ NLP_LOCK(nlp);
+ bool was_active = nlp->nl_active;
+ nlp->nl_active = false;
+ NLP_UNLOCK(nlp);
+
+ /* Wait till all scheduled work has been completed */
+ taskqueue_drain_all(nlp->nl_taskqueue);
+ taskqueue_free(nlp->nl_taskqueue);
+
+ CTL_WLOCK();
+ NLP_LOCK(nlp);
+ if (was_active) {
+ CK_LIST_REMOVE(nlp, nl_port_next);
+ RT_LOG(LOG_DEBUG2, "socket %p, unlinking bound pid %u", so, nlp->nl_port);
+ }
+ CK_LIST_REMOVE(nlp, nl_next);
+ nlp->nl_socket = NULL;
+ NLP_UNLOCK(nlp);
+ CTL_WUNLOCK();
+
+ so->so_pcb = NULL;
+
+ RT_LOG(LOG_DEBUG2, "socket %p, detached", so);
+
+ epoch_call(net_epoch_preempt, destroy_socket_epoch, &nlp->nl_epoch_ctx);
+}
+
+static int
+nl_pru_disconnect(struct socket *so)
+{
+ RT_LOG(LOG_DEBUG2, "socket %p, PID %d", so, curproc->p_pid);
+ MPASS(sotonlpcb(so) != NULL);
+ return (ENOTCONN);
+}
+
+static int
+nl_pru_peeraddr(struct socket *so, struct sockaddr **nam)
+{
+ RT_LOG(LOG_DEBUG2, "socket %p, PID %d", so, curproc->p_pid);
+ MPASS(sotonlpcb(so) != NULL);
+ return (ENOTCONN);
+}
+
+static int
+nl_pru_shutdown(struct socket *so)
+{
+ RT_LOG(LOG_DEBUG2, "socket %p, PID %d", so, curproc->p_pid);
+ MPASS(sotonlpcb(so) != NULL);
+ socantsendmore(so);
+ return (0);
+}
+
+static int
+nl_pru_sockaddr(struct socket *so, struct sockaddr **nam)
+{
+ struct sockaddr_nl *snl;
+
+ snl = malloc(sizeof(struct sockaddr_nl), M_SONAME, M_WAITOK | M_ZERO);
+ /* TODO: set other fields */
+ snl->nl_len = sizeof(struct sockaddr_nl);
+ snl->nl_family = AF_NETLINK;
+ snl->nl_pid = sotonlpcb(so)->nl_port;
+ *nam = (struct sockaddr *)snl;
+ return (0);
+}
+
+static void
+nl_pru_close(struct socket *so)
+{
+ RT_LOG(LOG_DEBUG2, "socket %p, PID %d", so, curproc->p_pid);
+ MPASS(sotonlpcb(so) != NULL);
+ soisdisconnected(so);
+}
+
+static int
+nl_pru_output(struct mbuf *m, struct socket *so, ...)
+{
+
+ if (__predict_false(m == NULL ||
+ ((m->m_len < sizeof(struct nlmsghdr)) &&
+ (m = m_pullup(m, sizeof(struct nlmsghdr))) == NULL)))
+ return (ENOBUFS);
+ MPASS((m->m_flags & M_PKTHDR) != 0);
+
+ nl_receive_async(m, so);
+ return (0);
+}
+
+
+static int
+nl_pru_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
+ struct mbuf *control, struct thread *td)
+{
+ RT_LOG(LOG_DEBUG2, "sending message to kernel");
+ return (nl_pru_output(m, so));
+}
+
+/* netlink usrreqs */
+static struct pr_usrreqs nl_usrreqs = {
+ .pru_abort = nl_pru_abort,
+ .pru_attach = nl_pru_attach,
+ .pru_bind = nl_pru_bind,
+ .pru_connect = nl_pru_connect,
+ .pru_detach = nl_pru_detach,
+ .pru_disconnect = nl_pru_disconnect,
+ .pru_peeraddr = nl_pru_peeraddr,
+ .pru_send = nl_pru_send,
+ .pru_shutdown = nl_pru_shutdown,
+ .pru_sockaddr = nl_pru_sockaddr,
+ .pru_close = nl_pru_close
+};
+
+static int
+nl_ctloutput(struct socket *so, struct sockopt *sopt)
+{
+ struct nlpcb *nlp = sotonlpcb(so);
+ uint32_t flag, groups;
+ int optval, error = 0;
+ CTL_TRACKER;
+
+ RT_LOG(LOG_DEBUG2, "%ssockopt(%p, %d)", (sopt->sopt_dir) ? "set" : "get",
+ so, sopt->sopt_name);
+
+ switch (sopt->sopt_dir) {
+ case SOPT_SET:
+ switch (sopt->sopt_name) {
+ case NETLINK_ADD_MEMBERSHIP:
+ case NETLINK_DROP_MEMBERSHIP:
+ sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval));
+
+ CTL_WLOCK();
+ if (sopt->sopt_name == NETLINK_ADD_MEMBERSHIP)
+ groups = nlp->nl_groups | optval;
+ else
+ groups = nlp->nl_groups & ~optval;
+ nl_update_groups_locked(nlp, groups);
+ CTL_WUNLOCK();
+ break;
+ case NETLINK_CAP_ACK:
+ case NETLINK_EXT_ACK:
+ sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval));
+
+ if (sopt->sopt_name == NETLINK_CAP_ACK)
+ flag = NLF_CAP_ACK;
+ else if (sopt->sopt_name == NETLINK_EXT_ACK)
+ flag = NLF_EXT_ACK;
+ else
+ flag = 0;
+
+ CTL_WLOCK();
+ if (optval != 0)
+ nlp->nl_flags |= flag;
+ else
+ nlp->nl_flags &= ~flag;
+ CTL_WUNLOCK();
+ break;
+ default:
+ error = ENOPROTOOPT;
+ }
+ break;
+ case SOPT_GET:
+ switch (sopt->sopt_name) {
+ case NETLINK_LIST_MEMBERSHIPS:
+ CTL_RLOCK();
+ optval = nlp->nl_groups;
+ CTL_RUNLOCK();
+ error = sooptcopyout(sopt, &optval, sizeof(optval));
+ break;
+ default:
+ error = ENOPROTOOPT;
+ }
+ break;
+ default:
+ error = ENOPROTOOPT;
+ }
+
+ return (error);
+}
+
+static struct domain netlinkdomain;
+
+static struct protosw netlinksw[] = {
+ {
+ .pr_type = SOCK_RAW,
+ .pr_domain = &netlinkdomain,
+ .pr_flags = PR_ATOMIC | PR_ADDR,
+ .pr_output = nl_pru_output,
+ .pr_ctloutput = nl_ctloutput,
+ .pr_usrreqs = &nl_usrreqs
+ },
+};
+
+static struct domain netlinkdomain = {
+ .dom_family = PF_NETLINK,
+ .dom_name = "netlink",
+ .dom_protosw = netlinksw,
+ .dom_flags = DOMF_UNLOADABLE,
+ .dom_protoswNPROTOSW = &netlinksw[sizeof(netlinksw) / sizeof(netlinksw[0])]
+};
+
+DOMAIN_SET(netlink);
Index: sys/netlink/netlink_iface.c
===================================================================
--- /dev/null
+++ sys/netlink/netlink_iface.c
@@ -0,0 +1,439 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Ng Peng Nam Sean
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/rmlock.h>
+#include <sys/socket.h>
+#include <sys/syslog.h>
+#include <sys/socketvar.h>
+#include <sys/ck.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+#include <net/route/nhop.h>
+#include <net/route/route_ctl.h>
+#include <net/route/route_var.h>
+#include <netlink/netlink.h>
+#include <netlink/netlink_var.h>
+#include <netlink/netlink_message.h>
+#include <netlink/netlink_route.h>
+
+#include <netinet6/scope6_var.h> /* scope deembedding */
+
+#define DEBUG_MOD_NAME nl_iface
+#define DEBUG_MAX_LEVEL LOG_DEBUG3
+#include <net/route/route_debug.h>
+_DECLARE_DEBUG(LOG_DEBUG3);
+
+
+struct netlink_walkargs {
+ struct nlmsg_state ns;
+ struct rib_cmd_info rc;
+ struct nlmsghdr hdr;
+ struct nlpcb *so;
+ uint32_t fibnum;
+ int family;
+ int error;
+ int count;
+ int dumped;
+};
+
+#define FAIL_ATTR(a) {\
+ RT_LOG(LOG_DEBUG, "failed writing attribute %s (%d)", #a, a); \
+ goto error; \
+}
+
+/*
+ * RTM_GETLINK request
+ * sendto(3, {{len=32, type=RTM_GETLINK, flags=NLM_F_REQUEST|NLM_F_DUMP, seq=1641940952, pid=0},
+ * {ifi_family=AF_INET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}}, 32, 0, NULL, 0) = 32
+ *
+ * Reply:
+ * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_ETHER, ifi_index=if_nametoindex("enp0s31f6"), ifi_flags=IFF_UP|IFF_BROADCAST|IFF_RUNNING|IFF_MULTICAST|IFF_LOWER_UP, ifi_change=0},
+{{nla_len=10, nla_type=IFLA_ADDRESS}, "\xfe\x54\x00\x52\x3e\x90"}
+
+[
+{{nla_len=14, nla_type=IFLA_IFNAME}, "enp0s31f6"},
+{{nla_len=8, nla_type=IFLA_TXQLEN}, 1000},
+{{nla_len=5, nla_type=IFLA_OPERSTATE}, 6},
+{{nla_len=5, nla_type=IFLA_LINKMODE}, 0},
+{{nla_len=8, nla_type=IFLA_MTU}, 1500},
+{{nla_len=8, nla_type=IFLA_MIN_MTU}, 68},
+ {{nla_len=8, nla_type=IFLA_MAX_MTU}, 9000},
+{{nla_len=8, nla_type=IFLA_GROUP}, 0},
+{{nla_len=8, nla_type=IFLA_PROMISCUITY}, 0},
+{{nla_len=8, nla_type=IFLA_NUM_TX_QUEUES}, 1},
+{{nla_len=8, nla_type=IFLA_GSO_MAX_SEGS}, 65535},
+{{nla_len=8, nla_type=IFLA_GSO_MAX_SIZE}, 65536},
+{{nla_len=8, nla_type=IFLA_NUM_RX_QUEUES}, 1},
+{{nla_len=5, nla_type=IFLA_CARRIER}, 1},
+{{nla_len=13, nla_type=IFLA_QDISC}, "fq_codel"},
+{{nla_len=8, nla_type=IFLA_CARRIER_CHANGES}, 2},
+{{nla_len=5, nla_type=IFLA_PROTO_DOWN}, 0},
+{{nla_len=8, nla_type=IFLA_CARRIER_UP_COUNT}, 1},
+{{nla_len=8, nla_type=IFLA_CARRIER_DOWN_COUNT}, 1},
+ */
+
+static unsigned
+ifp_flags_to_netlink(const struct ifnet *ifp)
+{
+ return (ifp->if_flags);
+}
+
+#define LLADDR_CONST(s) ((const void *)((s)->sdl_data + (s)->sdl_nlen))
+static bool
+dump_sa(struct nlmsg_state *ns, int attr, const struct sockaddr *sa)
+{
+ uint32_t addr_len = 0;
+ const void *addr_data = NULL;
+ struct in6_addr addr6;
+
+ if (sa == NULL)
+ return (true);
+
+ switch (sa->sa_family) {
+ case AF_INET:
+ addr_len = sizeof(struct in_addr);
+ addr_data = &((const struct sockaddr_in *)sa)->sin_addr;
+ break;
+ case AF_INET6:
+ in6_splitscope(&((const struct sockaddr_in6 *)sa)->sin6_addr, &addr6, &addr_len);
+ addr_len = sizeof(struct in6_addr);
+ addr_data = &addr6;
+ break;
+ case AF_LINK:
+ addr_len = ((const struct sockaddr_dl *)sa)->sdl_alen;
+ addr_data = LLADDR_CONST((const struct sockaddr_dl *)sa);
+ break;
+ default:
+ RT_LOG(LOG_DEBUG, "unsupported family: %d, skipping", sa->sa_family);
+ return (true);
+ }
+
+ return (nla_put(ns, attr, addr_len, addr_data));
+}
+
+static bool
+dump_iface(struct netlink_walkargs *wa, struct ifnet *ifp, const struct nlmsghdr *hdr)
+{
+ struct ifinfomsg *ifinfo;
+
+ wa->count++;
+
+ int payload_len = sizeof(struct ifinfomsg);
+ nlmsg_put(&wa->ns, hdr->nlmsg_pid, hdr->nlmsg_seq, NL_RTM_NEWLINK,
+ hdr->nlmsg_flags, payload_len);
+
+ ifinfo = nlmsg_reserve_object(&wa->ns, struct ifinfomsg);
+ ifinfo->ifi_family = AF_UNSPEC;
+ ifinfo->__ifi_pad = 0;
+ ifinfo->ifi_type = ifp->if_type; // ARPHDR
+ ifinfo->ifi_index = ifp->if_index;
+ ifinfo->ifi_flags = ifp_flags_to_netlink(ifp);
+ ifinfo->ifi_change = 0;
+
+ if (!nla_put_string(&wa->ns, IFLA_IFNAME, ifp->if_xname))
+ goto error;
+
+ uint8_t val = 0; // XXX: operstate?
+ if (!nla_put_u8(&wa->ns, IFLA_OPERSTATE, val))
+ goto error;
+
+ /* XXX: carrier */
+ if (!nla_put_u8(&wa->ns, IFLA_CARRIER, val))
+ goto error;
+
+ if (!nla_put_u8(&wa->ns, IFLA_PROTO_DOWN, val))
+ goto error;
+
+ if (!nla_put_u8(&wa->ns, IFLA_LINKMODE, val))
+ goto error;
+
+ /* Link addr */
+ if ((ifp->if_addr != NULL)) {
+ if (!dump_sa(&wa->ns, IFLA_ADDRESS, ifp->if_addr->ifa_addr))
+ goto error;
+ }
+
+ if (!nla_put_u32(&wa->ns, IFLA_MTU, ifp->if_mtu))
+ goto error;
+/*
+ if (!nla_put_u32(&wa->ns, IFLA_MIN_MTU, 60))
+ goto error;
+
+ if (!nla_put_u32(&wa->ns, IFLA_MAX_MTU, 9000))
+ goto error;
+
+ if (!nla_put_u32(&wa->ns, IFLA_GROUP, 0))
+ goto error;
+*/
+ if (!nla_put_u32(&wa->ns, IFLA_PROMISCUITY, 0))
+ goto error;
+
+ nlmsg_end(&wa->ns);
+
+ wa->dumped++;
+
+ return (true);
+
+error:
+ RT_LOG(LOG_DEBUG, "Fuck!");
+ nlmsg_abort(&wa->ns);
+ return (false);
+}
+
+int
+rtnl_handle_getlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct netlink_parse_tracker *npt)
+{
+ struct ifnet *ifp;
+ int error = 0;
+
+ struct netlink_walkargs wa = {
+ .so = nlp,
+ .rc.rc_cmd = NL_RTM_NEWLINK,
+ .hdr.nlmsg_pid = hdr->nlmsg_pid,
+ .hdr.nlmsg_seq = hdr->nlmsg_seq,
+ .hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI,
+ };
+
+ if (!nlmsg_get_socket_writer(NLMSG_LARGE, nlp, &wa.ns)) {
+ RT_LOG(LOG_DEBUG, "error allocating mbuf");
+ return (ENOMEM);
+ }
+
+ RT_LOG(LOG_DEBUG, "Start dump");
+
+ CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ if (!dump_iface(&wa, ifp, hdr)) {
+ error = ENOMEM;
+ break;
+ }
+ }
+
+ RT_LOG(LOG_DEBUG, "End dump, iterated %d dumped %d", wa.count, wa.dumped);
+
+ if (!nlmsg_put(&wa.ns, hdr->nlmsg_pid, hdr->nlmsg_seq, NLMSG_DONE, 0, sizeof(int))) {
+ RT_LOG(LOG_DEBUG, "Fuck");
+ return (ENOMEM);
+ }
+ /* report operation result */
+ int *perror = nlmsg_reserve_object(&wa.ns, int);
+ RT_LOG(LOG_DEBUG2, "record error=%d at off %d (%p)", error, wa.ns.offset, perror);
+ *perror = error;
+ nlmsg_end(&wa.ns);
+ nlmsg_flush(&wa.ns);
+
+
+ return (error);
+}
+
+
+/*
+
+{ifa_family=AF_INET, ifa_prefixlen=8, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_HOST, ifa_index=if_nametoindex("lo")},
+ [
+ {{nla_len=8, nla_type=IFA_ADDRESS}, inet_addr("127.0.0.1")},
+ {{nla_len=8, nla_type=IFA_LOCAL}, inet_addr("127.0.0.1")},
+ {{nla_len=7, nla_type=IFA_LABEL}, "lo"},
+ {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT},
+ {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=3619, tstamp=3619}}]},
+---
+
+{{len=72, type=RTM_NEWADDR, flags=NLM_F_MULTI, seq=1642191126, pid=566735},
+ {ifa_family=AF_INET6, ifa_prefixlen=96, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_UNIVERSE, ifa_index=if_nametoindex("virbr0")},
+ [
+ {{nla_len=20, nla_type=IFA_ADDRESS}, inet_pton(AF_INET6, "2a01:4f8:13a:70c:ffff::1")},
+ {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=4283, tstamp=4283}},
+ {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT}]},
+*/
+
+static uint8_t
+ifa_get_scope(const struct ifaddr *ifa)
+{
+ const struct sockaddr *sa;
+ uint8_t addr_scope = RT_SCOPE_UNIVERSE;
+
+ sa = ifa->ifa_addr;
+ switch (sa->sa_family) {
+ case AF_INET:
+ {
+ struct in_addr addr;
+ addr = ((const struct sockaddr_in *)sa)->sin_addr;
+ if (IN_LOOPBACK(addr.s_addr))
+ addr_scope = RT_SCOPE_HOST;
+ else if (IN_LINKLOCAL(addr.s_addr))
+ addr_scope = RT_SCOPE_LINK;
+ break;
+ }
+ case AF_INET6:
+ {
+ const struct in6_addr *addr;
+ addr = &((const struct sockaddr_in6 *)sa)->sin6_addr;
+ if (IN6_IS_ADDR_LOOPBACK(addr))
+ addr_scope = RT_SCOPE_HOST;
+ else if (IN6_IS_ADDR_LINKLOCAL(addr))
+ addr_scope = RT_SCOPE_LINK;
+ break;
+ }
+ }
+
+ return (addr_scope);
+}
+
+static uint8_t
+inet6_get_plen(const struct in6_addr *addr)
+{
+
+ return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) +
+ bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3]));
+}
+
+static uint8_t
+get_sa_plen(const struct sockaddr *sa)
+{
+ const struct in6_addr *paddr6;
+ const struct in_addr *paddr;
+
+ switch (sa->sa_family) {
+ case AF_INET:
+ if (sa == NULL)
+ return (32);
+ paddr = &(((const struct sockaddr_in *)sa)->sin_addr);
+ return bitcount32(paddr->s_addr);;
+ case AF_INET6:
+ if (sa == NULL)
+ return (128);
+ paddr6 = &(((const struct sockaddr_in6 *)sa)->sin6_addr);
+ return inet6_get_plen(paddr6);
+ }
+
+ return (0);
+}
+
+
+static bool
+dump_iface_addr(struct netlink_walkargs *wa, struct ifnet *ifp, struct ifaddr *ifa,
+ struct nlmsghdr *hdr)
+{
+ struct ifaddrmsg *ifamsg;
+ struct sockaddr *sa = ifa->ifa_addr;
+
+ wa->count++;
+
+ RT_LOG(LOG_DEBUG3, "dumping ifa %p type %s(%d) for interface %s",
+ ifa, rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp));
+
+ int payload_len = sizeof(struct ifaddrmsg);
+ nlmsg_put(&wa->ns, hdr->nlmsg_pid, hdr->nlmsg_seq, NL_RTM_NEWADDR,
+ hdr->nlmsg_flags, payload_len);
+
+ ifamsg = nlmsg_reserve_object(&wa->ns, struct ifaddrmsg);
+ ifamsg->ifa_family = sa->sa_family;
+ ifamsg->ifa_prefixlen = get_sa_plen(ifa->ifa_netmask);
+ ifamsg->ifa_flags = 0; // ifa_flags is useless
+ ifamsg->ifa_scope = ifa_get_scope(ifa);
+ ifamsg->ifa_index = ifp->if_index;
+
+ struct sockaddr *dst_sa = ifa->ifa_dstaddr;
+ if ((dst_sa == NULL) || (dst_sa->sa_family != sa->sa_family))
+ dst_sa = sa;
+ if (!dump_sa(&wa->ns, IFA_ADDRESS, dst_sa))
+ FAIL_ATTR(IFA_ADDRESS);
+ if (!dump_sa(&wa->ns, IFA_LOCAL, sa))
+ FAIL_ATTR(IFA_LOCAL);
+
+ uint32_t val = 0; // ifa->ifa_flags;
+ if (!nla_put_u32(&wa->ns, IFA_FLAGS, val))
+ FAIL_ATTR(IFA_FLAGS);
+
+ nlmsg_end(&wa->ns);
+ wa->dumped++;
+ return (true);
+error:
+ RT_LOG(LOG_DEBUG, "Failed to dump ifa type %s(%d) for interface %s",
+ rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp));
+ nlmsg_abort(&wa->ns);
+ return (false);
+}
+
+int
+rtnl_handle_getaddr(struct nlmsghdr *hdr, struct nlpcb *nlp, struct netlink_parse_tracker *npt)
+{
+ struct ifaddr *ifa;
+ struct ifnet *ifp;
+ int error = 0;
+
+ struct netlink_walkargs wa = {
+ .so = nlp,
+ .rc.rc_cmd = NL_RTM_NEWADDR,
+ .hdr.nlmsg_pid = hdr->nlmsg_pid,
+ .hdr.nlmsg_seq = hdr->nlmsg_seq,
+ .hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI,
+ };
+
+ if (!nlmsg_get_socket_writer(NLMSG_LARGE, nlp, &wa.ns)) {
+ RT_LOG(LOG_DEBUG, "error allocating mbuf");
+ return (ENOMEM);
+ }
+
+ RT_LOG(LOG_DEBUG, "Start dump");
+
+ CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (wa.family != 0 && wa.family != ifa->ifa_addr->sa_family)
+ continue;
+ if (ifa->ifa_addr->sa_family == AF_LINK)
+ continue;
+ if (!dump_iface_addr(&wa, ifp, ifa, hdr)) {
+ error = ENOMEM;
+ break;
+ }
+ }
+ if (error != 0)
+ break;
+ }
+
+ RT_LOG(LOG_DEBUG, "End dump, iterated %d dumped %d", wa.count, wa.dumped);
+
+ if (!nlmsg_put(&wa.ns, hdr->nlmsg_pid, hdr->nlmsg_seq, NLMSG_DONE, 0, sizeof(int))) {
+ RT_LOG(LOG_DEBUG, "Unable to write message");
+ return (ENOMEM);
+ }
+ int *perror = nlmsg_reserve_object(&wa.ns, int);
+ RT_LOG(LOG_DEBUG2, "record error=%d at off %d (%p)", error, wa.ns.offset, perror);
+ *perror = error;
+ nlmsg_end(&wa.ns);
+ nlmsg_flush(&wa.ns);
+
+ return (error);
+}
Index: sys/netlink/netlink_io.c
===================================================================
--- /dev/null
+++ sys/netlink/netlink_io.c
@@ -0,0 +1,343 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Ng Peng Nam Sean
+ * Copyright (c) 2022 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/mbuf.h>
+#include <sys/ck.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/syslog.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/netisr.h>
+
+#include <netlink/netlink.h>
+#include <netlink/netlink_var.h>
+#include <netlink/netlink_message.h>
+
+#define DEBUG_MOD_NAME nl_io
+#define DEBUG_MAX_LEVEL LOG_DEBUG3
+#include <net/route/route_debug.h>
+_DECLARE_DEBUG(LOG_DEBUG3);
+
+
+static struct sockaddr_nl _nl_empty_src = {
+ .nl_len = sizeof(struct sockaddr_nl),
+ .nl_family = PF_NETLINK,
+ .nl_pid = 0 /* comes from the kernel */
+};
+static struct sockaddr *nl_empty_src = (struct sockaddr *)&_nl_empty_src;
+
+static int nl_receive(struct mbuf *m, struct nlpcb *nlp);
+
+int
+nl_receive_async(struct mbuf *m, struct socket *so)
+{
+ struct nlpcb *nlp = sotonlpcb(so);
+
+ m->m_nextpkt = NULL;
+
+ NLP_LOCK(nlp);
+
+ if ((__predict_false(!nlp->nl_active))) {
+ NLP_UNLOCK(nlp);
+ m_free(m);
+ return (EINVAL);
+ }
+
+ /* XXX: Implement queue limits */
+ if (nlp->nl_queue_head == NULL) {
+ nlp->nl_queue_head = m;
+ nlp->nl_queue_last = m;
+ } else {
+ nlp->nl_queue_last->m_nextpkt = m;
+ nlp->nl_queue_last = m;
+ }
+ nlp->nl_queue_length += m_length(m, NULL);
+ RT_LOG(LOG_DEBUG3, "enqueue, total len %ld", nlp->nl_queue_length);
+
+ if (!nlp->nl_task_pending) {
+ nlp->nl_task_pending = true;
+ taskqueue_enqueue(nlp->nl_taskqueue, &nlp->nl_task);
+ }
+ NLP_UNLOCK(nlp);
+
+ return (0);
+}
+
+static void
+nl_process_received(struct nlpcb *nlp)
+{
+ struct mbuf *m;
+
+ NLP_LOCK(nlp);
+ m = nlp->nl_queue_head;
+ nlp->nl_queue_head = NULL;
+ nlp->nl_queue_last = NULL;
+ nlp->nl_queue_length = 0;
+ nlp->nl_task_pending = false;
+ NLP_UNLOCK(nlp);
+
+ RT_LOG(LOG_DEBUG2, "taskqueue called");
+
+ while (m != NULL) {
+ struct mbuf *m_next = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+ nl_receive(m, nlp);
+ m = m_next;
+ }
+}
+
+void
+nl_taskqueue_handler(void *_arg, int pending)
+{
+ struct nlpcb *nlp = (struct nlpcb *)_arg;
+ struct epoch_tracker et;
+
+ CURVNET_SET(nlp->nl_socket->so_vnet);
+ NET_EPOCH_ENTER(et);
+ nl_process_received(nlp);
+ NET_EPOCH_EXIT(et);
+ CURVNET_RESTORE();
+}
+
+bool
+nl_send_one(struct mbuf *m, struct nlpcb *nlp)
+{
+#if DEBUG_MAX_LEVEL > LOG_DEBUG2
+ struct nlmsghdr *hdr = mtod(m, struct nlmsghdr *);
+ RT_LOG(LOG_DEBUG2, "TX mbuf len %u msg type %d first hdrlen %u",
+ m->m_len, hdr->nlmsg_type, hdr->nlmsg_len);
+#endif
+ bool result = false;
+ NLP_LOCK(nlp);
+ if (nlp->nl_socket != NULL) {
+ struct socket *so = nlp->nl_socket;
+
+ if (sbappendaddr(&so->so_rcv, nl_empty_src, m, NULL) != 0) {
+ sorwakeup(so);
+ RT_LOG(LOG_DEBUG3, "TX done");
+ result = true;
+ } else {
+ soroverflow(so);
+ m_freem(m);
+ RT_LOG(LOG_DEBUG, "socket RX overflow for PID %u",
+ nlp->nl_process_id);
+ }
+
+ }
+ NLP_UNLOCK(nlp);
+
+ return (result);
+}
+
+/*
+ * Used when certain data needs to be broadcasted to the group
+ */
+void
+nl_send_group(struct mbuf *m, uint32_t groups_mask)
+{
+ struct nlpcb *nlp_last = NULL;
+ struct nlpcb *nlp;
+ CTL_TRACKER;
+
+#if DEBUG_MAX_LEVEL > LOG_DEBUG2
+ struct nlmsghdr *hdr = mtod(m, struct nlmsghdr *);
+ RT_LOG(LOG_DEBUG2, "MCAST mbuf len %u msg type %d len %u to groups 0x%X",
+ m->m_len, hdr->nlmsg_type, hdr->nlmsg_len, groups_mask);
+#endif
+
+ CTL_RLOCK();
+
+ CK_LIST_FOREACH(nlp, &V_nl_ctl->ctl_pcb_head, nl_next) {
+ if (nlp->nl_groups & groups_mask) {
+ if (nlp_last != NULL) {
+ struct mbuf *m_copy;
+ m_copy = m_copym(m, 0, M_COPYALL, M_NOWAIT);
+ if (m_copy != NULL)
+ nl_send_one(m_copy, nlp_last);
+ else {
+ NLP_LOCK(nlp_last);
+ if (nlp_last->nl_socket != NULL)
+ sorwakeup(nlp_last->nl_socket);
+ NLP_UNLOCK(nlp_last);
+ }
+ }
+ nlp_last = nlp;
+ }
+ }
+ if (nlp_last != NULL)
+ nl_send_one(m, nlp_last);
+ else
+ m_freem(m);
+
+ CTL_RUNLOCK();
+}
+
+/*
+ * Sends an ack message
+ */
+void
+nlmsg_ack(struct nlpcb *nlp, int error, struct nlmsghdr *nlmsg)
+{
+ struct nlmsgerr *errmsg;
+ int payload_len;
+ uint32_t flags = nlp->nl_flags;
+ struct nlmsg_state ns;
+ bool cap_ack;
+
+ payload_len = sizeof(struct nlmsgerr);
+
+ /*
+ * The only case when we send the full message in the
+ * reply is when there is an error and NETLINK_CAP_ACK
+ * is not set.
+ */
+ cap_ack = (error == 0) || (flags & NLF_CAP_ACK);
+ if (!cap_ack)
+ payload_len += nlmsg->nlmsg_len - sizeof(struct nlmsghdr);
+
+ /*
+ * TODO: handle NETLINK_F_EXT_ACK sockopt
+ * TODO: handle cookies
+ */
+
+ int sz = payload_len + sizeof(struct nlmsghdr);
+ if (!nlmsg_get_socket_writer(sz, nlp, &ns)) {
+ RT_LOG(LOG_NOTICE, "error allocating nlmsg(%d)", sz);
+ return;
+ }
+
+ RT_LOG(LOG_DEBUG, "type-%d;payload-%d;pid-%d;seq-%d", NLMSG_ERROR, payload_len,
+ nlp->nl_port, nlmsg->nlmsg_seq);
+
+ nlmsg_put(&ns, nlp->nl_port, nlmsg->nlmsg_seq, NLMSG_ERROR, 0, payload_len);
+
+ errmsg = nlmsg_reserve_data(&ns, payload_len, struct nlmsgerr);
+ errmsg->error = error;
+ /* In case of error copy the whole message, else just the header */
+ memcpy(&errmsg->msg, nlmsg, cap_ack ? sizeof(*nlmsg) : nlmsg->nlmsg_len);
+
+ nlmsg_end(&ns);
+ nlmsg_flush(&ns);
+}
+
+static int
+nl_receive_message(struct nlmsghdr *hdr, int remaining_length,
+ struct nlpcb *nlp, struct netlink_parse_tracker *npt)
+{
+ nl_handler handler = nl_handlers[nlp->nl_proto];
+ int error = 0;
+
+ RT_LOG(LOG_DEBUG3, "msg len: %d type: %d", hdr->nlmsg_len, hdr->nlmsg_type);
+
+ if (__predict_false(hdr->nlmsg_len > remaining_length)) {
+ RT_LOG(LOG_DEBUG, "invalid message");
+ return (EINVAL);
+ } else if (__predict_false(hdr->nlmsg_len < sizeof(*hdr))) {
+ RT_LOG(LOG_DEBUG, "message too short: %d", hdr->nlmsg_len);
+ return (EINVAL);
+ }
+ /* Stamp each message with sender pid */
+ hdr->nlmsg_pid = nlp->nl_port;
+
+ if (hdr->nlmsg_flags & NLM_F_REQUEST && hdr->nlmsg_type >= NLMSG_MIN_TYPE) {
+ RT_LOG(LOG_DEBUG2, "handling message with msg type: %d",
+ hdr->nlmsg_type);
+
+ struct nlmsghdr *thdr = hdr;
+ if (nlp->nl_linux)
+ thdr = nlmsg_from_linux(hdr, npt);
+ error = handler(thdr, npt);
+ RT_LOG(LOG_DEBUG2, "retcode: %d", error);
+ }
+ if ((hdr->nlmsg_flags & NLM_F_ACK) || (error != 0 && error != EINTR)) {
+ RT_LOG(LOG_DEBUG3, "ack");
+ nlmsg_ack(nlp, error, hdr);
+ RT_LOG(LOG_DEBUG3, "done");
+ }
+
+ return (0);
+}
+
+/*
+ * Processes an incoming packet, which can contain multiple netlink messages
+ */
+static int
+nl_receive(struct mbuf *m, struct nlpcb *nlp)
+{
+ int offset, buffer_length, error = 0;
+ struct nlmsghdr *hdr;
+ char *buffer;
+
+ RT_LOG(LOG_DEBUG, "RX netlink mbuf %p on %p", m, nlp->nl_socket);
+
+ int data_length = m_length(m, NULL);
+ buffer_length = roundup2(data_length, 8) + SCRATCH_BUFFER_SIZE;
+ if (nlp->nl_linux)
+ buffer_length += roundup2(data_length, 8);
+ buffer = malloc(buffer_length, M_NETLINK, M_NOWAIT | M_ZERO);
+ if (buffer == NULL) {
+ m_freem(m);
+ RT_LOG(LOG_DEBUG, "Unable to allocate %d bytes of memory",
+ buffer_length);
+ return (ENOMEM);
+ }
+ m_copydata(m, 0, data_length, buffer);
+ m_freem(m); // XXX: reuse for ack?
+
+ struct netlink_parse_tracker npt = {
+ .nlp = nlp,
+ .lb.base = &buffer[roundup2(data_length, 8)],
+ .lb.size = buffer_length - roundup2(data_length, 8),
+ };
+
+ for (offset = 0; offset + sizeof(struct nlmsghdr) <= data_length;) {
+ hdr = (struct nlmsghdr *)&buffer[offset];
+ /* Save length prior to calling handler */
+ int msglen = NLMSG_ALIGN(hdr->nlmsg_len);
+ RT_LOG(LOG_DEBUG2, "parsing offset %d/%d", offset, data_length);
+ /* Update parse state */
+ npt.hdr = hdr;
+ lb_clear(&npt.lb);
+ error = nl_receive_message(hdr, data_length - offset, nlp, &npt);
+ if (__predict_false(error != 0))
+ break;
+ offset += msglen;
+ }
+ RT_LOG(LOG_DEBUG2, "packet parsing done");
+
+ free(buffer, M_NETLINK);
+ return (error);
+}
Index: sys/netlink/netlink_linux.c
===================================================================
--- /dev/null
+++ sys/netlink/netlink_linux.c
@@ -0,0 +1,451 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Ng Peng Nam Sean
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/rmlock.h>
+#include <sys/socket.h>
+#include <sys/ck.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+#include <net/route/nhop.h>
+#include <net/route/route_ctl.h>
+#include <net/route/route_var.h>
+#include <netlink/netlink.h>
+#include <netlink/netlink_var.h>
+#include <netlink/netlink_message.h>
+#include <netlink/netlink_route.h>
+
+#include <compat/linux/linux.h>
+#include <compat/linux/linux_common.h>
+
+#define DEBUG_MOD_NAME nl_linux
+#define DEBUG_MAX_LEVEL LOG_DEBUG3
+#include <net/route/route_debug.h>
+_DECLARE_DEBUG(LOG_DEBUG3);
+
+static int
+_linux_to_bsd_domain(int domain)
+{
+
+ switch (domain) {
+ case LINUX_AF_UNSPEC:
+ return (AF_UNSPEC);
+ case LINUX_AF_UNIX:
+ return (AF_LOCAL);
+ case LINUX_AF_INET:
+ return (AF_INET);
+ case LINUX_AF_INET6:
+ return (AF_INET6);
+ }
+ return (-1);
+}
+
+static int
+_bsd_to_linux_domain(int domain)
+{
+
+ switch (domain) {
+ case AF_UNSPEC:
+ return (LINUX_AF_UNSPEC);
+ case AF_LOCAL:
+ return (LINUX_AF_UNIX);
+ case AF_INET:
+ return (LINUX_AF_INET);
+ case AF_INET6:
+ return (LINUX_AF_INET6);
+ }
+ return (-1);
+}
+
+static bool
+valid_rta_size(const struct rtattr *rta, int sz)
+{
+ return (NL_RTA_DATA_LEN(rta) == sz);
+}
+
+static bool
+valid_rta_u32(const struct rtattr *rta)
+{
+ return (valid_rta_size(rta, sizeof(uint32_t)));
+}
+
+static uint32_t
+nl_rta_get_uint32(const struct rtattr *rta)
+{
+ return (*((const uint32_t *)NL_RTA_DATA_CONST(rta)));
+}
+
+static struct nlmsghdr *
+rtnl_route_from_linux(struct nlmsghdr *hdr, struct netlink_parse_tracker *npt)
+{
+ /* Tweak address families and default fib only */
+ struct rtmsg *rtm = (struct rtmsg *)(hdr + 1);
+ struct nlattr *nla, *nla_head;
+ int attrs_len;
+
+ rtm->rtm_family = _linux_to_bsd_domain(rtm->rtm_family);
+
+ if (rtm->rtm_table == 254)
+ rtm->rtm_table = 0;
+
+ attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr);
+ attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg));
+ nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg)));
+
+ NLA_FOREACH(nla, nla_head, attrs_len) {
+ RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d",
+ nla->nla_type, nla->nla_len, attrs_len);
+ struct rtattr *rta = (struct rtattr *)nla;
+ if (rta->rta_len < sizeof(struct rtattr)) {
+ break;
+ }
+ switch (rta->rta_type) {
+ case NL_RTA_TABLE:
+ if (!valid_rta_u32(rta))
+ goto done;
+ rtm->rtm_table = 0;
+ uint32_t fibnum = nl_rta_get_uint32(rta);
+ RT_LOG(LOG_DEBUG, "GET RTABLE: %u", fibnum);
+ if (fibnum == 254) {
+ *((uint32_t *)NL_RTA_DATA(rta)) = 0;
+ }
+ break;
+ }
+ }
+
+done:
+ return (hdr);
+}
+
+static struct nlmsghdr *
+rtnl_from_linux(struct nlmsghdr *hdr, struct netlink_parse_tracker *npt)
+{
+ switch (hdr->nlmsg_type) {
+ case NL_RTM_GETROUTE:
+ case NL_RTM_NEWROUTE:
+ case NL_RTM_DELROUTE:
+ return (rtnl_route_from_linux(hdr, npt));
+ }
+
+ return (hdr);
+}
+
+struct nlmsghdr *
+nlmsg_from_linux(struct nlmsghdr *hdr, struct netlink_parse_tracker *npt)
+{
+ struct nlpcb *nlp = npt->nlp;
+
+ switch (nlp->nl_proto) {
+ case NETLINK_ROUTE:
+ return (rtnl_from_linux(hdr, npt));
+ }
+
+ return (hdr);
+}
+
+
+/************************************************************
+ * Kernel -> Linux
+ ************************************************************/
+
+static bool
+handle_default_out(struct nlmsghdr *hdr, struct nlmsg_state *ns)
+{
+ char *out_hdr;
+ out_hdr = nlmsg_reserve_data(ns, NLMSG_ALIGN(hdr->nlmsg_len), char);
+
+ if (out_hdr != NULL) {
+ memcpy(out_hdr, hdr, hdr->nlmsg_len);
+ return (true);
+ }
+ return (false);
+}
+
+static bool
+nlmsg_copy_header(struct nlmsghdr *hdr, struct nlmsg_state *ns)
+{
+ return (nlmsg_put(ns, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type,
+ hdr->nlmsg_flags, 0));
+}
+
+static void *
+_nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nlmsg_state *ns, int sz)
+{
+ void *next_hdr = nlmsg_reserve_data(ns, sz, void);
+ memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz));
+
+ return (next_hdr);
+}
+#define nlmsg_copy_next_header(_hdr, _ns, _t) \
+ ((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t))))
+
+static bool
+nlmsg_copy_nla(const struct nlattr *nla_orig, struct nlmsg_state *ns)
+{
+ struct nlattr *nla = nlmsg_reserve_data(ns, nla_orig->nla_len, struct nlattr);
+ if (nla != NULL) {
+ memcpy(nla, nla_orig, nla_orig->nla_len);
+ return (true);
+ }
+ return (false);
+}
+
+static bool
+nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nlmsg_state *ns)
+{
+ struct nlattr *nla;
+
+ int hdrlen = NETLINK_ALIGN(raw_hdrlen);
+ int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen;
+ struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen);
+
+ NLA_FOREACH(nla, nla_head, attrs_len) {
+ if (!nlmsg_copy_nla(nla, ns))
+ return (false);
+ }
+ return (true);
+}
+
+static unsigned int
+rtnl_if_flags_to_linux(unsigned int if_flags)
+{
+ unsigned int result = 0;
+
+ for (int i = 0; i < 31; i++) {
+ unsigned int flag = 1 << i;
+ if (!(flag & if_flags))
+ continue;
+ switch (flag) {
+ case IFF_UP:
+ case IFF_BROADCAST:
+ case IFF_DEBUG:
+ case IFF_LOOPBACK:
+ case IFF_POINTOPOINT:
+ case IFF_NOARP:
+ case IFF_PROMISC:
+ case IFF_ALLMULTI:
+ result |= flag;
+ break;
+ case IFF_KNOWSEPOCH:
+ case IFF_DRV_RUNNING:
+ case IFF_DRV_OACTIVE:
+ case IFF_SIMPLEX:
+ case IFF_LINK0:
+ case IFF_LINK1:
+ case IFF_LINK2:
+ case IFF_CANTCONFIG:
+ case IFF_PPROMISC:
+ case IFF_MONITOR:
+ case IFF_STATICARP:
+ case IFF_STICKYARP:
+ case IFF_DYING:
+ case IFF_RENAMING:
+ case IFF_NOGROUP:
+ /* No Linux analogue */
+ break;
+ case IFF_MULTICAST:
+ result |= 1 << 12;
+ }
+ }
+ return (result);
+}
+
+static bool
+rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
+ struct nlmsg_state *ns)
+{
+ if (!nlmsg_copy_header(hdr, ns))
+ return (false);
+
+ struct ifinfomsg *ifinfo;
+ ifinfo = nlmsg_copy_next_header(hdr, ns, struct ifinfomsg);
+
+ ifinfo->ifi_family = _bsd_to_linux_domain(ifinfo->ifi_family);
+ /* Convert interface type */
+ switch (ifinfo->ifi_type) {
+ case IFT_ETHER:
+ ifinfo->ifi_type = 1; // ARPHRD_ETHER
+ break;
+ }
+ ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags);
+
+ /* Copy attributes unchanged */
+ if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), ns))
+ return (false);
+
+ /* make ip(8) happy */
+ if (!nla_put_string(ns, IFLA_QDISC, "noqueue"))
+ return (false);
+
+ if (!nla_put_u32(ns, IFLA_TXQLEN, 1000))
+ return (false);
+
+ nlmsg_end(ns);
+ RT_LOG(LOG_DEBUG2, "done processing ns %p", ns);
+ return (true);
+}
+
+static bool
+rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
+ struct nlmsg_state *ns)
+{
+ if (!nlmsg_copy_header(hdr, ns))
+ return (false);
+
+ struct ifaddrmsg *ifamsg;
+ ifamsg = nlmsg_copy_next_header(hdr, ns, struct ifaddrmsg);
+
+ ifamsg->ifa_family = _bsd_to_linux_domain(ifamsg->ifa_family);
+ /* XXX: fake ifa_flags? */
+
+ /* Copy attributes unchanged */
+ if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), ns))
+ return (false);
+
+ nlmsg_end(ns);
+ RT_LOG(LOG_DEBUG2, "done processing ns %p", ns);
+ return (true);
+}
+
+static bool
+rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
+ struct nlmsg_state *ns)
+{
+ if (!nlmsg_copy_header(hdr, ns))
+ return (false);
+
+ struct rtmsg *rtm;
+ rtm = nlmsg_copy_next_header(hdr, ns, struct rtmsg);
+ rtm->rtm_family = _bsd_to_linux_domain(rtm->rtm_family);
+
+ struct nlattr *nla;
+
+ int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg));
+ int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen;
+ struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen);
+
+ NLA_FOREACH(nla, nla_head, attrs_len) {
+ struct rtattr *rta = (struct rtattr *)nla;
+
+ switch (rta->rta_type) {
+ case NL_RTA_TABLE:
+ {
+ uint32_t fibnum;
+ fibnum = nl_rta_get_uint32(rta);
+ if (fibnum == 0)
+ fibnum = 254;
+ RT_LOG(LOG_DEBUG3, "FIBNUM %u", fibnum);
+ if (!nla_put_u32(ns, NL_RTA_TABLE, fibnum))
+ return (false);
+ }
+ break;
+ default:
+ if (!nlmsg_copy_nla(nla, ns))
+ return (false);
+ break;
+ }
+ }
+
+ nlmsg_end(ns);
+ RT_LOG(LOG_DEBUG2, "done processing ns %p", ns);
+ return (true);
+}
+
+static bool
+rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nlmsg_state *ns)
+{
+ RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type);
+
+ switch (hdr->nlmsg_type) {
+ case NL_RTM_NEWLINK:
+ return (rtnl_newlink_to_linux(hdr, nlp, ns));
+ case NL_RTM_NEWADDR:
+ return (rtnl_newaddr_to_linux(hdr, nlp, ns));
+ case NL_RTM_NEWROUTE:
+ return (rtnl_newroute_to_linux(hdr, nlp, ns));
+ default:
+ return (handle_default_out(hdr, ns));
+ }
+}
+
+static bool
+nlmsg_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nlmsg_state *ns)
+{
+ switch (nlp->nl_proto) {
+ case NETLINK_ROUTE:
+ return (rtnl_to_linux(hdr, nlp, ns));
+ default:
+ return (handle_default_out(hdr, ns));
+ }
+}
+
+struct mbuf *
+nlmsgs_to_linux(char *buf, int data_length, struct nlpcb *nlp,
+ struct linear_buffer *lb)
+{
+ RT_LOG(LOG_DEBUG, "LINUX: get %p size %d", buf, data_length);
+ struct nlmsg_state ns = {};
+
+ struct mbuf *m = NULL;
+ if (!nlmsg_get_chain_writer(data_length, &m, &ns)) {
+ RT_LOG(LOG_DEBUG, "unable to setup chain writer for size %d",
+ data_length);
+ return (NULL);
+ }
+
+ /* Assume correct headers. Buffer IS mutable */
+ int count = 0;
+ for (int offset = 0; offset + sizeof(struct nlmsghdr) <= data_length;) {
+ struct nlmsghdr *hdr = (struct nlmsghdr *)&buf[offset];
+ int msglen = NLMSG_ALIGN(hdr->nlmsg_len);
+ count++;
+
+ lb_clear(lb);
+ if (!nlmsg_to_linux(hdr, nlp, &ns)) {
+ RT_LOG(LOG_DEBUG, "failed to process msg type %d",
+ hdr->nlmsg_type);
+ m_freem(m);
+ return (NULL);
+ }
+ offset += msglen;
+ }
+ nlmsg_flush(&ns);
+ nlmsg_free(&ns);
+ RT_LOG(LOG_DEBUG2, "Processed %d messages, chain size %d", count, m ? m_length(m, NULL) : 0);
+
+ return (m);
+}
+
+
+
Index: sys/netlink/netlink_message.h
===================================================================
--- /dev/null
+++ sys/netlink/netlink_message.h
@@ -0,0 +1,194 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Ng Peng Nam Sean
+ * Copyright (c) 2022 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+#ifndef _NETLINK_NETLINK_MESSAGE_H_
+#define _NETLINK_NETLINK_MESSAGE_H_
+
+
+#ifdef _KERNEL
+
+#define _roundup2(x, y) (((x)+((y)-1))&(~((y)-1)))
+
+#define NETLINK_ALIGN_SIZE sizeof(uint32_t)
+#define NETLINK_ALIGN(_len) _roundup2(_len, NETLINK_ALIGN_SIZE)
+
+#define NLA_ALIGN_SIZE sizeof(uint32_t)
+#define NLA_ALIGN(_len) _roundup2(_len, NLA_ALIGN_SIZE)
+
+#define NLA_NEXT(_attr) (struct nlattr *)((char *)_attr + NLA_ALIGN(_attr->nla_len))
+#define NLA_FOREACH(_attr, _start, _len) \
+ for (_attr = (_start); ((char *)NLA_NEXT(_attr) - (char *)(_start)) <= (_len); _attr = NLA_NEXT(_attr))
+
+struct mbuf;
+struct nlmsg_state;
+typedef bool nlmsg_state_cb(struct nlmsg_state *ns, char *buf, int buflen);
+
+struct nlmsg_state {
+ int alloc_len;
+ int offset;
+ struct nlmsghdr *hdr;
+ char *data; // pointer to contig storage
+ union {
+ struct mbuf *_m;
+ char *_buf;
+ };
+ nlmsg_state_cb *cb;
+ void *arg;
+ int malloc_flag; // M_WAITOK | M_NOWAIT
+ uint8_t writer_type;
+ uint8_t writer_target;
+};
+#define NS_WRITER_TARGET_SOCKET 0
+#define NS_WRITER_TARGET_GROUP 1
+#define NS_WRITER_TARGET_CHAIN 2
+
+#define NS_WRITER_TYPE_MBUF 0
+#define NS_WRITER_TYPE_BUF 1
+#define NS_WRITER_TYPE_LBUF 2
+#define NS_WRITER_TYPE_MBUFC 3
+
+
+#define NLMSG_SMALL 128
+#define NLMSG_LARGE 2048
+
+struct nlpcb;
+bool nlmsg_get_socket_writer(int size, struct nlpcb *nlp, struct nlmsg_state *ns);
+bool nlmsg_get_group_writer(int size, uint32_t group_mask, struct nlmsg_state *ns);
+bool nlmsg_get_chain_writer(int size, struct mbuf **pm, struct nlmsg_state *ns);
+void nlmsg_free(struct nlmsg_state *ns);
+bool nlmsg_put(struct nlmsg_state *ns, uint32_t portid, uint32_t seq, uint16_t type,
+ uint16_t flags, uint32_t len);
+void *nlmsg_reserve_data_raw(struct nlmsg_state *ns, size_t sz);
+void nlmsg_end(struct nlmsg_state *ns);
+void nlmsg_abort(struct nlmsg_state *ns);
+bool nlmsg_flush(struct nlmsg_state *ns);
+
+#define nlmsg_data(_hdr) ((void *)((_hdr) + 1))
+
+#define nlmsg_reserve_object(_ns, _t) ((_t *)nlmsg_reserve_data_raw(_ns, NLA_ALIGN(sizeof(_t))))
+#define nlmsg_reserve_data(_ns, _sz, _t) ((_t *)nlmsg_reserve_data_raw(_ns, _sz))
+
+/* Attributes */
+bool nla_put_handle_oom(struct nlmsg_state *ns, int attr_type, int attr_len,
+ const void *data);
+
+static inline bool
+nla_put_noerror(struct nlmsg_state *ns, int attr_type, int attr_len,
+ const void *data)
+{
+ int required_len = NLA_ALIGN(attr_len + sizeof(struct nlattr));
+ struct nlattr *nla = (struct nlattr *)(&ns->data[ns->offset]);
+
+ nla->nla_len = attr_len + sizeof(struct nlattr);
+ nla->nla_type = attr_type;
+ if (attr_len > 0) {
+ memcpy((nla + 1), data, attr_len);
+ }
+ ns->offset += required_len;
+ return (true);
+}
+
+static inline bool
+nla_put(struct nlmsg_state *ns, int attr_type, int attr_len, const void *data)
+{
+ int required_len = NLA_ALIGN(attr_len + sizeof(struct nlattr));
+
+ if (__predict_false(ns->offset + required_len > ns->alloc_len)) {
+ if (!nla_put_handle_oom(ns, attr_type, attr_len, data))
+ return (false);
+ }
+
+ return (nla_put_noerror(ns, attr_type, attr_len, data));
+}
+
+static inline bool
+nla_put_u8(struct nlmsg_state *ns, int attrtype, uint8_t value)
+{
+ return (nla_put(ns, attrtype, sizeof(uint8_t), &value));
+}
+
+static inline bool
+nla_put_u16(struct nlmsg_state *ns, int attrtype, uint16_t value)
+{
+ return (nla_put(ns, attrtype, sizeof(uint16_t), &value));
+}
+
+static inline bool
+nla_put_u32(struct nlmsg_state *ns, int attrtype, uint32_t value)
+{
+ return (nla_put(ns, attrtype, sizeof(uint32_t), &value));
+}
+
+static inline bool
+nla_put_u64(struct nlmsg_state *ns, int attrtype, uint64_t value)
+{
+ return (nla_put(ns, attrtype, sizeof(uint64_t), &value));
+}
+
+static inline bool
+nla_put_s8(struct nlmsg_state *ns, int attrtype, int8_t value)
+{
+ return (nla_put(ns, attrtype, sizeof(int8_t), &value));
+}
+
+static inline bool
+nla_put_s16(struct nlmsg_state *ns, int attrtype, int16_t value)
+{
+ return (nla_put(ns, attrtype, sizeof(int16_t), &value));
+}
+
+static inline bool
+nla_put_s32(struct nlmsg_state *ns, int attrtype, int32_t value)
+{
+ return (nla_put(ns, attrtype, sizeof(int32_t), &value));
+}
+
+static inline bool
+nla_put_s64(struct nlmsg_state *ns, int attrtype, int64_t value)
+{
+ return (nla_put(ns, attrtype, sizeof(int64_t), &value));
+}
+
+static inline bool
+nla_put_flag(struct nlmsg_state *ns, int attrtype)
+{
+ return (nla_put(ns, attrtype, 0, NULL));
+}
+
+static inline bool
+nla_put_string(struct nlmsg_state *ns, int attrtype, const char *str)
+{
+ return (nla_put(ns, attrtype, strlen(str) + 1, str));
+}
+
+
+#endif
+
+
+#endif
Index: sys/netlink/netlink_message.c
===================================================================
--- /dev/null
+++ sys/netlink/netlink_message.c
@@ -0,0 +1,582 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2022 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/mbuf.h>
+#include <sys/ck.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/syslog.h>
+
+#include <netlink/netlink.h>
+#include <netlink/netlink_var.h>
+#include <netlink/netlink_message.h>
+
+#define DEBUG_MOD_NAME nl_message
+#define DEBUG_MAX_LEVEL LOG_DEBUG3
+#include <net/route/route_debug.h>
+_DECLARE_DEBUG(LOG_DEBUG3);
+
+
+typedef bool nlwriter_op_init(struct nlmsg_state *ns, int size, bool waitok);
+typedef bool nlwriter_op_write(struct nlmsg_state *ns, char *buf, int buflen);
+
+struct nlwriter_ops {
+ nlwriter_op_init *init;
+ nlwriter_op_write *write_socket;
+ nlwriter_op_write *write_group;
+ nlwriter_op_write *write_chain;
+};
+
+/*
+ * NS_WRITER_TYPE_BUF
+ * Writes message to a temporary memory buffer,
+ * flushing to the socket/group when buffer size limit is reached
+ */
+static bool
+nlmsg_get_ns_buf(struct nlmsg_state *ns, int size, bool waitok)
+{
+ int mflag = waitok ? M_WAITOK : M_NOWAIT;
+ ns->_buf = malloc(size, M_NETLINK, mflag | M_ZERO);
+ if (__predict_false(ns->_buf == NULL))
+ return (false);
+ ns->alloc_len = size;
+ ns->offset = 0;
+ ns->hdr = NULL;
+ ns->data = ns->_buf;
+ ns->writer_type = NS_WRITER_TYPE_BUF;
+ ns->malloc_flag = mflag;
+ return (true);
+}
+
+static bool
+nlmsg_write_socket_buf(struct nlmsg_state *ns, char *buf, int datalen)
+{
+ RT_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, ns);
+ if (__predict_false(datalen == 0)) {
+ free(buf, M_NETLINK);
+ return (true);
+ }
+
+ struct mbuf *m = m_getm2(NULL, datalen, ns->malloc_flag, MT_DATA, M_PKTHDR);
+ if (__predict_false(m == NULL)) {
+ /* XXX: should we set sorcverr? */
+ free(buf, M_NETLINK);
+ return (false);
+ }
+ m_append(m, datalen, buf);
+ free(buf, M_NETLINK);
+
+ return (nl_send_one(m, (struct nlpcb *)(ns->arg)));
+}
+
+static bool
+nlmsg_write_group_buf(struct nlmsg_state *ns, char *buf, int datalen)
+{
+ RT_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, ns->arg);
+ if (__predict_false(datalen == 0)) {
+ free(buf, M_NETLINK);
+ return (true);
+ }
+
+ struct mbuf *m = m_getm2(NULL, datalen, ns->malloc_flag, MT_DATA, M_PKTHDR);
+ if (__predict_false(m == NULL)) {
+ free(buf, M_NETLINK);
+ return (false);
+ }
+ bool success = m_append(m, datalen, buf) != 0;
+ free(buf, M_NETLINK);
+
+ if (!success)
+ return (false);
+
+ nl_send_group(m, (uint32_t)(uintptr_t)(ns->arg));
+ return (true);
+}
+
+static bool
+nlmsg_write_chain_buf(struct nlmsg_state *ns, char *buf, int datalen)
+{
+ struct mbuf **m0 = (struct mbuf **)(ns->arg);
+ RT_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, ns->arg);
+
+ if (__predict_false(datalen == 0)) {
+ free(buf, M_NETLINK);
+ return (true);
+ }
+
+ if (*m0 == NULL) {
+ struct mbuf *m;
+
+ m = m_getm2(NULL, datalen, ns->malloc_flag, MT_DATA, M_PKTHDR);
+ if (__predict_false(m == NULL)) {
+ free(buf, M_NETLINK);
+ return (false);
+ }
+ *m0 = m;
+ }
+ if (__predict_false(m_append(*m0, datalen, buf) == 0)) {
+ free(buf, M_NETLINK);
+ return (false);
+ }
+ return (true);
+}
+
+
+/*
+ * NS_WRITER_TYPE_MBUF
+ * Writes message to the allocated mbuf,
+ * flushing to socket/group when mbuf size limit is reached.
+ * This is the most efficient mechanism as it avoids double-copying.
+ *
+ * Allocates a single mbuf suitable to store up to @size bytes of data.
+ * If size < MHLEN (around 160 bytes), allocates mbuf with pkghdr
+ * If size <= MCLBYTES (2k), allocate a single mbuf cluster
+ * Otherwise, return NULL.
+ */
+static bool
+nlmsg_get_ns_mbuf(struct nlmsg_state *ns, int size, bool waitok)
+{
+ struct mbuf *m;
+
+ int mflag = waitok ? M_WAITOK : M_NOWAIT;
+ m = m_get2(size, mflag, MT_DATA, M_PKTHDR);
+ if (__predict_false(m == NULL))
+ return (false);
+ ns->alloc_len = M_TRAILINGSPACE(m);
+ ns->offset = 0;
+ ns->hdr = NULL;
+ ns->_m = m;
+ ns->data = mtod(m, void *);
+ ns->writer_type = NS_WRITER_TYPE_MBUF;
+ ns->malloc_flag = mflag;
+ RT_LOG(LOG_DEBUG2, "alloc mbuf %p req_len %d alloc_len %d data_ptr %p",
+ m, size, ns->alloc_len, ns->data);
+ return (true);
+}
+
+static bool
+nlmsg_write_socket_mbuf(struct nlmsg_state *ns, char *buf, int datalen)
+{
+ struct mbuf *m = (struct mbuf *)buf;
+ RT_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, ns->arg);
+
+ if (__predict_false(datalen == 0)) {
+ m_freem(m);
+ return (true);
+ }
+
+ m->m_pkthdr.len = datalen;
+ m->m_len = datalen;
+ return (nl_send_one(m, (struct nlpcb *)(ns->arg)));
+}
+
+static bool
+nlmsg_write_group_mbuf(struct nlmsg_state *ns, char *buf, int datalen)
+{
+ struct mbuf *m = (struct mbuf *)buf;
+ RT_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, ns->arg);
+
+ if (__predict_false(datalen == 0)) {
+ m_freem(m);
+ return (true);
+ }
+
+ m->m_pkthdr.len = datalen;
+ m->m_len = datalen;
+ nl_send_group(m, (uint32_t)(uintptr_t)(ns->arg));
+ return (true);
+}
+
+static bool
+nlmsg_write_chain_mbuf(struct nlmsg_state *ns, char *buf, int datalen)
+{
+ struct mbuf *m_new = (struct mbuf *)buf;
+ struct mbuf **m0 = (struct mbuf **)(ns->arg);
+
+ RT_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, ns->arg);
+
+ if (__predict_false(datalen == 0)) {
+ m_freem(m_new);
+ return (true);
+ }
+
+ m_new->m_pkthdr.len = datalen;
+ m_new->m_len = datalen;
+
+ if (*m0 == NULL) {
+ *m0 = m_new;
+ } else {
+ struct mbuf *m_last;
+ for (m_last = *m0; m_last->m_next != NULL; m_last = m_last->m_next)
+ ;
+ m_last->m_next = m_new;
+ (*m0)->m_pkthdr.len += datalen;
+ }
+
+ return (true);
+}
+
+/*
+ * NS_WRITER_TYPE_LBUF
+ * Writes message to the allocated memory buffer,
+ * flushing to socket/group when mbuf size limit is reached.
+ * Calls linux handler to rewrite messages before sending to the socket.
+ */
+static bool
+nlmsg_get_ns_lbuf(struct nlmsg_state *ns, int size, bool waitok)
+{
+ int mflag = waitok ? M_WAITOK : M_NOWAIT;
+ size = roundup2(size, sizeof(void *));
+ int add_size = sizeof(struct linear_buffer) + SCRATCH_BUFFER_SIZE;
+ char *buf = malloc(add_size + size * 2, M_NETLINK, mflag | M_ZERO);
+ if (__predict_false(buf == NULL))
+ return (false);
+
+ /* Fill buffer header first */
+ struct linear_buffer *lb = (struct linear_buffer *)buf;
+ lb->base = &buf[sizeof(struct linear_buffer) + size];
+ lb->size = size + SCRATCH_BUFFER_SIZE;
+
+ ns->alloc_len = size;
+ ns->offset = 0;
+ ns->hdr = NULL;
+ ns->_buf = buf;
+ ns->data = (char *)(lb + 1);
+ ns->malloc_flag = mflag;
+ ns->writer_type = NS_WRITER_TYPE_LBUF;
+ return (true);
+}
+
+
+static bool
+nlmsg_write_socket_lbuf(struct nlmsg_state *ns, char *buf, int datalen)
+{
+ struct linear_buffer *lb = (struct linear_buffer *)buf;
+ char *data = (char *)(lb + 1);
+ struct nlpcb *nlp = (struct nlpcb *)(ns->arg);
+
+ if (__predict_false(datalen == 0)) {
+ free(buf, M_NETLINK);
+ return (true);
+ }
+
+ struct mbuf *m = nlmsgs_to_linux(data, datalen, nlp, lb);
+ free(buf, M_NETLINK);
+
+ if (__predict_false(m == NULL)) {
+ /* XXX: should we set sorcverr? */
+ return (false);
+ }
+
+ return (nl_send_one(m, nlp));
+}
+
+/* Shouldn't be called (maybe except Linux code originating message) */
+static bool
+nlmsg_write_group_lbuf(struct nlmsg_state *ns,char *buf, int datalen)
+{
+ struct linear_buffer *lb = (struct linear_buffer *)buf;
+ char *data = (char *)(lb + 1);
+
+ if (__predict_false(datalen == 0)) {
+ free(buf, M_NETLINK);
+ return (true);
+ }
+
+ struct mbuf *m = m_getm2(NULL, datalen, ns->malloc_flag, MT_DATA, M_PKTHDR);
+ if (__predict_false(m == NULL)) {
+ free(buf, M_NETLINK);
+ return (false);
+ }
+ m_append(m, datalen, data);
+ free(buf, M_NETLINK);
+
+ nl_send_group(m, (uint32_t)(uintptr_t)(ns->arg));
+ return (true);
+}
+
+struct nlwriter_ops nlmsg_writers[] = {
+ /* NS_WRITER_TYPE_MBUF */
+ {
+ .init = nlmsg_get_ns_mbuf,
+ .write_socket = nlmsg_write_socket_mbuf,
+ .write_group = nlmsg_write_group_mbuf,
+ .write_chain = nlmsg_write_chain_mbuf,
+ },
+ /* NS_WRITER_TYPE_BUF */
+ {
+ .init = nlmsg_get_ns_buf,
+ .write_socket = nlmsg_write_socket_buf,
+ .write_group = nlmsg_write_group_buf,
+ .write_chain = nlmsg_write_chain_buf,
+ },
+ /* NS_WRITER_TYPE_LBUF */
+ {
+ .init = nlmsg_get_ns_lbuf,
+ .write_socket = nlmsg_write_socket_lbuf,
+ .write_group = nlmsg_write_group_lbuf,
+ },
+};
+
+static void
+nlmsg_set_callback(struct nlmsg_state *ns)
+{
+ struct nlwriter_ops *pops = &nlmsg_writers[ns->writer_type];
+
+ switch (ns->writer_target) {
+ case NS_WRITER_TARGET_SOCKET:
+ ns->cb = pops->write_socket;
+ break;
+ case NS_WRITER_TARGET_GROUP:
+ ns->cb = pops->write_group;
+ break;
+ case NS_WRITER_TARGET_CHAIN:
+ ns->cb = pops->write_chain;
+ break;
+ default:
+ panic("not implemented");
+ }
+}
+
+static bool
+nlmsg_get_buf_type(struct nlmsg_state *ns, int size, int type, bool waitok)
+{
+ MPASS(type + 1 <= sizeof(nlmsg_writers) / sizeof(nlmsg_writers[0]));
+ RT_LOG(LOG_DEBUG3, "Setting up ns %p size %d type %d", ns, size, type);
+ return (nlmsg_writers[type].init(ns, size, waitok));
+}
+
+static bool
+nlmsg_get_buf(struct nlmsg_state *ns, int size, bool waitok, bool is_linux)
+{
+ int type;
+
+ if (!is_linux) {
+ if (__predict_true(size <= MCLBYTES))
+ type = NS_WRITER_TYPE_MBUF;
+ else
+ type = NS_WRITER_TYPE_BUF;
+ } else
+ type = NS_WRITER_TYPE_LBUF;
+ return (nlmsg_get_buf_type(ns, size, type, waitok));
+}
+
+bool
+nlmsg_get_socket_writer(int size, struct nlpcb *nlp, struct nlmsg_state *ns)
+{
+ if (!nlmsg_get_buf(ns, size, false, nlp->nl_linux))
+ return (false);
+ ns->arg = (void *)nlp;
+ ns->writer_target = NS_WRITER_TARGET_SOCKET;
+ nlmsg_set_callback(ns);
+ return (true);
+}
+
+bool
+nlmsg_get_group_writer(int size, uint32_t group_mask, struct nlmsg_state *ns)
+{
+ if (!nlmsg_get_buf(ns, size, false, false))
+ return (false);
+ ns->arg = (void *)(uintptr_t)group_mask;
+ ns->writer_target = NS_WRITER_TARGET_GROUP;
+ nlmsg_set_callback(ns);
+ return (true);
+}
+
+bool
+nlmsg_get_chain_writer(int size, struct mbuf **pm, struct nlmsg_state *ns)
+{
+ if (!nlmsg_get_buf(ns, size, false, false))
+ return (false);
+ *pm = NULL;
+ ns->arg = (void *)pm;
+ ns->writer_target = NS_WRITER_TARGET_CHAIN;
+ nlmsg_set_callback(ns);
+ RT_LOG(LOG_DEBUG3, "setup cb %p (need %p)", ns->cb, &nlmsg_write_chain_mbuf);
+ return (true);
+}
+
+void
+nlmsg_free(struct nlmsg_state *ns)
+{
+ ns->cb(ns, ns->_buf, 0);
+}
+
+bool
+nlmsg_flush(struct nlmsg_state *ns)
+{
+
+ if (__predict_false(ns->hdr != NULL)) {
+ /* Last message has not been completed, skip it. */
+ int completed_len = (char *)ns->hdr - ns->data;
+ /* Send completed messages */
+ ns->offset -= ns->offset - completed_len;
+ ns->hdr = NULL;
+ }
+
+ bool result = ns->cb(ns, ns->_buf, ns->offset);
+ ns->_buf = NULL;
+
+ if (!result) {
+ RT_LOG(LOG_DEBUG, "ns %p offset %d: flush with %p() failed", ns, ns->offset, ns->cb);
+ }
+
+ return (result);
+}
+
+static __noinline bool
+clear_storage(struct nlmsg_state *ns)
+{
+ struct nlmsg_state ns_new = {};
+ int completed_len, new_len;
+ RT_LOG(LOG_DEBUG2, "realloc storage: used %d/%d bytes", ns->offset, ns->alloc_len);
+
+ /* Calculated new buffer size and allocate it s*/
+ completed_len = (ns->hdr != NULL) ? (char *)ns->hdr - ns->data : ns->offset;
+ if (completed_len > 0) {
+ /* We already ran out of space, use the largest effective size */
+ new_len = max(ns->alloc_len, MCLBYTES);
+ } else {
+ if (ns->alloc_len < MCLBYTES)
+ new_len = MCLBYTES;
+ else
+ new_len = ns->alloc_len * 2;
+ }
+ bool waitok = ns->malloc_flag == M_WAITOK;
+ bool is_linux = ns->writer_type == NS_WRITER_TYPE_LBUF;
+ if (!nlmsg_get_buf(&ns_new, new_len, waitok, is_linux))
+ return (false);
+
+ /* Update callback data */
+ ns_new.writer_target = ns->writer_target;
+ nlmsg_set_callback(&ns_new);
+ ns_new.arg = ns->arg;
+
+ /* Copy last (unfinished) header to the new storage */
+ int last_len = ns->offset - completed_len;
+ if (last_len > 0) {
+ memcpy(ns_new.data, ns->hdr, last_len);
+ ns_new.hdr = (struct nlmsghdr *)ns_new.data;
+ ns_new.offset = last_len;
+ }
+
+ RT_LOG(LOG_DEBUG2, "completed: %d bytes, copied: %d bytes", completed_len, last_len);
+
+ /* Flush completed headers */
+ if (completed_len > 0) {
+ RT_LOG(LOG_DEBUG2, "Flushing completed %d bytes", completed_len);
+ ns->offset -= last_len;
+ ns->hdr = NULL;
+ nlmsg_flush(ns);
+ }
+
+ /* Update state */
+ memcpy(ns, &ns_new, sizeof(struct nlmsg_state));
+ RT_LOG(LOG_DEBUG2, "switched mbuf: used %d/%d bytes", ns->offset, ns->alloc_len);
+
+ return (true);
+}
+
+/*
+ * Note it MAY invalidate any previous pointers fetched.
+ */
+void *
+nlmsg_reserve_data_raw(struct nlmsg_state *ns, size_t sz)
+{
+ if (__predict_false(ns->offset + NETLINK_ALIGN(sz) > ns->alloc_len)) {
+ if (!clear_storage(ns))
+ return (NULL);
+ }
+
+ void *data_ptr = &ns->data[ns->offset];
+
+ RT_LOG(LOG_DEBUG3, "add data at offset %d, buf %p data_ptr %p",
+ ns->offset, ns->data, data_ptr);
+
+ ns->offset += NLMSG_ALIGN(sz);
+
+ return (data_ptr);
+}
+
+bool
+nlmsg_put(struct nlmsg_state *ns, uint32_t portid, uint32_t seq, uint16_t type,
+ uint16_t flags, uint32_t len)
+{
+ struct nlmsghdr *hdr;
+
+ if (__predict_false(ns->offset + NETLINK_ALIGN(len + sizeof(struct nlmsghdr)) > ns->alloc_len)) {
+ if (!clear_storage(ns))
+ return (false);
+ }
+
+ hdr = (struct nlmsghdr *)(&ns->data[ns->offset]);
+
+ hdr->nlmsg_len = len;
+ hdr->nlmsg_type = type;
+ hdr->nlmsg_flags = flags;
+ hdr->nlmsg_seq = seq;
+ hdr->nlmsg_pid = portid;
+
+ ns->hdr = hdr;
+ ns->offset += sizeof(struct nlmsghdr);
+
+ return (true);
+}
+
+void
+nlmsg_end(struct nlmsg_state *ns)
+{
+ ns->hdr->nlmsg_len = (uint32_t)(ns->data + ns->offset - (char *)ns->hdr);
+ ns->hdr = NULL;
+}
+
+void
+nlmsg_abort(struct nlmsg_state *ns)
+{
+ if (ns->hdr != NULL) {
+ ns->offset = (uint32_t)((char *)ns->hdr - ns->data);
+ ns->hdr = NULL;
+ }
+}
+
+bool
+nla_put_handle_oom(struct nlmsg_state *ns, int attr_type, int attr_len,
+ const void *data)
+{
+ int required_len = NLA_ALIGN(attr_len + sizeof(struct nlattr));
+
+ RT_LOG(LOG_DEBUG3,
+ "no space at offset %d (want %d), alloc_len %d, trying to reclaim",
+ ns->offset, required_len, ns->alloc_len);
+ return (clear_storage(ns));
+}
Index: sys/netlink/netlink_module.c
===================================================================
--- /dev/null
+++ sys/netlink/netlink_module.c
@@ -0,0 +1,208 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Ng Peng Nam Sean
+ * Copyright (c) 2022 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/mbuf.h>
+#include <sys/ck.h>
+#include <sys/syslog.h>
+
+#include <netlink/netlink.h>
+#include <netlink/netlink_var.h>
+
+MALLOC_DEFINE(M_NETLINK, "netlink", "Memory used for netlink packets");
+
+#define DEBUG_MOD_NAME nl_mod
+#define DEBUG_MAX_LEVEL LOG_DEBUG3
+#include <net/route/route_debug.h>
+_DECLARE_DEBUG(LOG_DEBUG3);
+
+
+SYSCTL_NODE(_net, OID_AUTO, netlink, CTLFLAG_RD, 0, "");
+
+nl_handler nl_handlers[NL_MAX_HANDLERS];
+
+CK_LIST_HEAD(nl_control_head, nl_control);
+static struct nl_control_head vnets_head = CK_LIST_HEAD_INITIALIZER();
+
+VNET_DEFINE(struct nl_control *, nl_ctl) = NULL;
+
+struct mtx nlsock_mtx;
+MTX_SYSINIT(nlsock, &nlsock_mtx, "nlsock for handlers or portid list lock", MTX_DEF);
+
+#define NL_GLOBAL_LOCK_INIT() mtx_init(&nlsock_mtx, "nlsock global mtx", NULL, MTX_DEF)
+#define NL_GLOBAL_LOCK() mtx_lock(&nlsock_mtx)
+#define NL_GLOBAL_UNLOCK() mtx_unlock(&nlsock_mtx)
+
+int netlink_unloading = 0;
+
+static void
+free_nl_ctl(struct nl_control *ctl)
+{
+ rm_destroy(&ctl->ctl_lock);
+ free(ctl, M_NETLINK);
+}
+
+void
+vnet_nl_ctl_init(void)
+{
+ struct nl_control *ctl;
+
+ ctl = malloc(sizeof(struct nl_control), M_NETLINK, M_WAITOK | M_ZERO);
+ rm_init(&ctl->ctl_lock, "netlink lock");
+ CK_LIST_INIT(&ctl->ctl_port_head);
+ CK_LIST_INIT(&ctl->ctl_pcb_head);
+
+ NL_GLOBAL_LOCK();
+
+ if (V_nl_ctl == NULL) {
+ V_nl_ctl = ctl;
+ CK_LIST_INSERT_HEAD(&vnets_head, ctl, ctl_next);
+ RT_LOG(LOG_DEBUG2, "VNET %p init done, inserted %p into global list",
+ curvnet, ctl);
+ } else{
+ RT_LOG(LOG_DEBUG, "per-VNET init clash, dropping this instance");
+ free_nl_ctl(ctl);
+ }
+
+ NL_GLOBAL_UNLOCK();
+}
+
+static void
+vnet_nl_ctl_destroy(void)
+{
+ struct nl_control *ctl;
+
+ NL_GLOBAL_LOCK();
+ ctl = V_nl_ctl;
+ V_nl_ctl = NULL;
+ RT_LOG(LOG_DEBUG2, "Removing %p from global list", ctl);
+ if (ctl != NULL)
+ CK_LIST_REMOVE(ctl, ctl_next);
+ NL_GLOBAL_UNLOCK();
+
+ if (ctl != NULL)
+ free_nl_ctl(ctl);
+}
+
+int
+nl_verify_proto(int proto)
+{
+ if (proto < 0 || proto >= NL_MAX_HANDLERS) {
+ return (EINVAL);
+ }
+ int handler_defined = nl_handlers[proto] != NULL;
+ return (handler_defined ? 0 : EPROTONOSUPPORT);
+}
+
+bool
+netlink_register_proto(int proto, nl_handler handler)
+{
+ if ((proto < 0) || (proto >= NL_MAX_HANDLERS))
+ return (false);
+ NL_GLOBAL_LOCK();
+ KASSERT((nl_handlers[proto] == NULL), ("netlink handler %d is already set", proto));
+ nl_handlers[proto] = handler;
+ NL_GLOBAL_UNLOCK();
+ RT_LOG(LOG_DEBUG, "Registered netlink proto %d handler", proto);
+ return (true);
+}
+
+bool
+netlink_unregister_proto(int proto)
+{
+ if ((proto < 0) || (proto >= NL_MAX_HANDLERS))
+ return (false);
+ NL_GLOBAL_LOCK();
+ KASSERT((nl_handlers[proto] != NULL), ("netlink handler %d is not set", proto));
+ nl_handlers[proto] = NULL;
+ NL_GLOBAL_UNLOCK();
+ RT_LOG(LOG_DEBUG, "Unregistered netlink proto %d handler", proto);
+ return (true);
+}
+
+
+
+static bool
+can_unload(void)
+{
+ struct nl_control *ctl;
+ bool result = true;
+
+ NL_GLOBAL_LOCK();
+
+ CK_LIST_FOREACH(ctl, &vnets_head, ctl_next) {
+ RT_LOG(LOG_DEBUG2, "Iterating VNET head %p", ctl);
+ if (!CK_LIST_EMPTY(&ctl->ctl_pcb_head)) {
+ RT_LOG(LOG_NOTICE, "non-empty socket list in ctl %p", ctl);
+ result = false;
+ break;
+ }
+ }
+
+ NL_GLOBAL_UNLOCK();
+
+ return (result);
+}
+
+static int
+netlink_modevent(module_t mod __unused, int what, void *priv __unused)
+{
+ int ret = 0;
+
+ switch (what) {
+ case MOD_LOAD:
+ RT_LOG(LOG_NOTICE, "Loading");
+ break;
+
+ case MOD_UNLOAD:
+ RT_LOG(LOG_NOTICE, "Unload called");
+ if (can_unload()) {
+ RT_LOG(LOG_WARNING, "unloading");
+ netlink_unloading = 1;
+ } else
+ ret = EBUSY;
+ break;
+
+ default:
+ ret = EOPNOTSUPP;
+ break;
+ }
+
+ return (ret);
+}
+static moduledata_t netlink_mod = { "netlink", netlink_modevent, NULL };
+
+DECLARE_MODULE(netlink, netlink_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(netlink, 1);
Index: sys/netlink/netlink_nhop.c
===================================================================
--- /dev/null
+++ sys/netlink/netlink_nhop.c
@@ -0,0 +1,304 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2022 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/rmlock.h>
+#include <sys/socket.h>
+#include <sys/ck.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+#include <net/route/nhop.h>
+#include <net/route/nhop_utils.h>
+
+#include <net/route/route_ctl.h>
+#include <net/route/route_var.h>
+#include <netlink/netlink.h>
+#include <netlink/netlink_var.h>
+#include <netlink/netlink_message.h>
+#include <netlink/netlink_route.h>
+
+#define DEBUG_MOD_NAME nl_nhop
+#define DEBUG_MAX_LEVEL LOG_DEBUG3
+#include <net/route/route_debug.h>
+_DECLARE_DEBUG(LOG_DEBUG3);
+
+/*
+ * idx -> {n:, d:, h:}
+ *
+ *
+ *
+ */
+struct user_nhop {
+ uint32_t un_idx; /* Userland-provided index */
+ struct nhop_object * un_nhop[3]; /* Normal, host, default */
+ struct user_nhop * un_next;
+ struct epoch_context un_epoch_ctx; /* epoch ctl helper */
+};
+
+/* produce hash value for an object */
+#define unhop_hash_obj(_obj) (hash_unhop(_obj))
+/* compare two objects */
+#define unhop_cmp(_one, _two) (cmp_unhop(_one, _two))
+/* next object accessor */
+#define unhop_next(_obj) (_obj)->un_next
+
+CHT_SLIST_DEFINE(unhop, struct user_nhop);
+
+VNET_DEFINE_STATIC(struct unhop_head *, nl_nhop_head) = NULL;
+#define V_nl_nhop_head VNET(nl_nhop_head)
+
+static void consider_resize(uint32_t new_gr_buckets);
+static int clone_unhop(const struct nhop_object *nh_base, int nh_flags,
+ struct nhop_object **pnh);
+
+static int cmp_unhop(const struct user_nhop *a, const struct user_nhop *b);
+static unsigned int hash_unhop(const struct user_nhop *obj);
+
+static int
+cmp_unhop(const struct user_nhop *a, const struct user_nhop *b)
+{
+ return (a->un_idx == b->un_idx);
+}
+
+/*
+ * Hash callback: calculate hash of an object
+ */
+static unsigned int
+hash_unhop(const struct user_nhop *obj)
+{
+ return (obj->un_idx);
+}
+
+/*
+ * Returns object referenced and unlocked
+ */
+static int
+find_unhop(uint32_t uidx, int nh_flags, struct nhop_object **pnhop)
+{
+ int error = 0;
+ CTL_TRACKER;
+
+ struct user_nhop key= { .un_idx = uidx }, *unhop;
+ nh_flags = nh_flags & (NHF_HOST | NHF_DEFAULT);
+
+ CTL_RLOCK();
+ CHT_SLIST_FIND_BYOBJ(V_nl_nhop_head, unhop, &key, unhop);
+ if (unhop != NULL) {
+ int off = 0;
+ switch (nh_flags) {
+ case NHF_HOST:
+ off = 1;
+ break;
+ case NHF_DEFAULT:
+ off = 2;
+ break;
+ }
+ if (unhop->un_nhop[off] != NULL) {
+ *pnhop = unhop->un_nhop[off];
+ goto done;
+ }
+ /* Nexthop with the required flags does not exist yet. */
+ struct nhop_object *nhop = NULL;
+ error = clone_unhop(unhop->un_nhop[0], nh_flags, &nhop);
+ if (error != 0)
+ goto done;
+
+ /*
+ * Nexhops remains constant once set and get dereferenced
+ * only when unhop is deleted.
+ */
+ if (!atomic_cmpset_ptr((uintptr_t *)&unhop->un_nhop[off],
+ (uintptr_t)NULL, (uintptr_t)nhop)) {
+ nhop_free_any(nhop);
+ nhop = atomic_load_ptr(&unhop->un_nhop[off]);
+ }
+ *pnhop = unhop->un_nhop[off];
+ CTL_RUNLOCK();
+ } else
+ error = ESRCH;
+done:
+ CTL_RUNLOCK();
+ return (error);
+}
+
+static struct rib_head *
+nhop_get_rnh(const struct nhop_object *nh)
+{
+ return (rt_tables_get_rnh(nhop_get_fibnum(nh), nhop_get_upper_family(nh)));
+}
+
+#define MAX_STACK_NHOPS 4
+static int
+clone_unhop(const struct nhop_object *nh_base, int nh_flags, struct nhop_object **pnh)
+{
+ const struct weightened_nhop *wn;
+ struct weightened_nhop *wn_new, wn_base[MAX_STACK_NHOPS];
+ uint32_t num_nhops;
+ int error;
+
+ if (!NH_IS_NHGRP(nh_base)) {
+ return (nhop_clone_flags(nh_base, nh_flags, pnh));
+ }
+
+ const struct nhgrp_object *nhg_base = (const struct nhgrp_object *)nh_base;
+ wn = nhgrp_get_nhops(nhg_base, &num_nhops);
+
+ if (num_nhops > MAX_STACK_NHOPS) {
+ wn_new = malloc(num_nhops * sizeof(struct weightened_nhop), M_TEMP, M_NOWAIT);
+ if (wn_new == NULL)
+ return (ENOMEM);
+ } else
+ wn_new = wn_base;
+
+ for (int i = 0; i < num_nhops; i++) {
+ uint32_t uidx = nhop_get_uidx(wn[i].nh);
+ if (uidx == 0) {
+ error = ESRCH;
+ break;
+ }
+ error = find_unhop(uidx, nh_flags, &wn_new[i].nh);
+ if (error != 0)
+ break;
+ wn_new[i].weight = wn[i].weight;
+ }
+
+ if (error == 0) {
+ struct rib_head *rh = nhop_get_rnh(wn_new[0].nh);
+ error = nhgrp_get_group(rh, wn_new, num_nhops,
+ (struct nhgrp_object **)pnh);
+ }
+
+ if (wn_new != wn_base)
+ free(wn_new, M_TEMP);
+ return (error);
+}
+
+static void
+destroy_unhop_epoch(epoch_context_t ctx) {
+ struct user_nhop *unhop;
+
+ unhop = __containerof(ctx, struct user_nhop, un_epoch_ctx);
+
+ for (int i = 0; i < 3; i++)
+ nhop_free_any(unhop->un_nhop[i]);
+ free(unhop, M_NETLINK);
+}
+
+
+static void
+delete_unhop(struct user_nhop *unhop) {
+ struct user_nhop *unhop_ret;
+
+ CTL_WLOCK();
+ CHT_SLIST_REMOVE(V_nl_nhop_head, unhop, unhop, unhop_ret);
+ CTL_WUNLOCK();
+
+ if (unhop_ret == NULL) {
+ RT_LOG(LOG_DEBUG, "unable to find unhop %u", unhop->un_idx);
+ }
+ MPASS(unhop == unhop_ret);
+
+ epoch_call(net_epoch_preempt, destroy_unhop_epoch,
+ &unhop->un_epoch_ctx);
+}
+
+
+static void
+consider_resize(uint32_t new_gr_bucket)
+{
+ void *gr_ptr = NULL;
+ size_t alloc_size;
+
+ if (new_gr_bucket == 0)
+ return;
+
+ if (new_gr_bucket != 0) {
+ alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_gr_bucket);
+ gr_ptr = malloc(alloc_size, M_NETLINK, M_NOWAIT | M_ZERO);
+ if (gr_ptr == NULL)
+ return;
+ }
+
+ CTL_WLOCK();
+ if (gr_ptr != NULL) {
+ CHT_SLIST_RESIZE(V_nl_nhop_head, unhop, gr_ptr, new_gr_bucket);
+ }
+ CTL_WUNLOCK();
+
+ if (gr_ptr != NULL)
+ free(gr_ptr, M_NETLINK);
+}
+
+static bool __noinline
+init_unhops()
+{
+ uint32_t num_buckets = 16;
+ size_t alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets);
+
+ struct unhop_head *phead = malloc(sizeof(struct unhop_head), M_NETLINK,
+ M_NOWAIT | M_ZERO);
+ if (phead == NULL)
+ return (NULL);
+
+ void *ptr = malloc(alloc_size, M_NETLINK, M_NOWAIT | M_ZERO);
+ if (ptr == NULL)
+ return (false);
+ CHT_SLIST_INIT(phead, ptr, num_buckets);
+
+ CTL_WLOCK();
+ if (V_nl_nhop_head == NULL)
+ V_nl_nhop_head = phead;
+ else {
+ free(ptr, M_NETLINK);
+ free(phead, M_NETLINK);
+ }
+ CTL_WUNLOCK();
+
+ return (true);
+}
+
+
+int
+rtnl_handle_newnhop(struct nlmsghdr *hdr, struct nlpcb *nlp,
+ struct netlink_parse_tracker *npt)
+{
+ if ((__predict_false(V_nl_nhop_head == NULL)) && (!init_unhops()))
+ return (ENOMEM);
+
+ return (0);
+}
+
+
+
+
Index: sys/netlink/netlink_route.h
===================================================================
--- /dev/null
+++ sys/netlink/netlink_route.h
@@ -0,0 +1,890 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Ng Peng Nam Sean
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _NETLINK_NETLINK_ROUTE_H_
+#define _NETLINK_NETLINK_ROUTE_H_
+
+#include <sys/types.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/if_var.h>
+
+/*
+ * Messages defined by the NETLINK_ROUTE subsystem
+ */
+
+enum {
+ NL_RTM_BASE = 16,
+#define NL_RTM_BASE NL_RTM_BASE
+ NL_RTM_NEWLINK = 16,
+#define NL_RTM_NEWLINK NL_RTM_NEWLINK
+ NL_RTM_DELLINK,
+#define NL_RTM_DELLINK NL_RTM_DELLINK
+ NL_RTM_GETLINK,
+#define NL_RTM_GETLINK NL_RTM_GETLINK
+ NL_RTM_SETLINK,
+#define NL_RTM_SETLINK NL_RTM_SETLINK
+ NL_RTM_NEWADDR = 20,
+#define NL_RTM_NEWADDR NL_RTM_NEWADDR
+ NL_RTM_DELADDR,
+#define NL_RTM_DELADDR NL_RTM_DELADDR
+ NL_RTM_GETADDR,
+#define NL_RTM_GETADDR NL_RTM_GETADDR
+ NL_RTM_NEWROUTE = 24,
+#define NL_RTM_NEWROUTE NL_RTM_NEWROUTE
+ NL_RTM_DELROUTE,
+#define NL_RTM_DELROUTE NL_RTM_DELROUTE
+ NL_RTM_GETROUTE,
+#define NL_RTM_GETROUTE NL_RTM_GETROUTE
+ NL_RTM_NEWNEIGH = 28,
+#define NL_RTM_NEWNEIGH NL_RTM_NEWNEIGH
+ NL_RTM_DELNEIGH,
+#define NL_RTM_DELNEIGH NL_RTM_DELNEIGH
+ NL_RTM_GETNEIGH,
+#define NL_RTM_GETNEIGH NL_RTM_GETNEIGH
+ NL_RTM_NEWRULE = 32,
+#define NL_RTM_NEWRULE NL_RTM_NEWRULE
+ NL_RTM_DELRULE,
+#define NL_RTM_DELRULE NL_RTM_DELRULE
+ NL_RTM_GETRULE,
+#define NL_RTM_GETRULE NL_RTM_GETRULE
+ NL_RTM_NEWQDISC = 36,
+#define NL_RTM_NEWQDISC NL_RTM_NEWQDISC
+ NL_RTM_DELQDISC,
+#define NL_RTM_DELQDISC NL_RTM_DELQDISC
+ NL_RTM_GETQDISC,
+#define NL_RTM_GETQDISC NL_RTM_GETQDISC
+ NL_RTM_NEWTCLASS = 40,
+#define NL_RTM_NEWTCLASS NL_RTM_NEWTCLASS
+ NL_RTM_DELTCLASS,
+#define NL_RTM_DELTCLASS NL_RTM_DELTCLASS
+ NL_RTM_GETTCLASS,
+#define NL_RTM_GETTCLASS NL_RTM_GETTCLASS
+ NL_RTM_NEWTFILTER = 44,
+#define NL_RTM_NEWTFILTER NL_RTM_NEWTFILTER
+ NL_RTM_DELTFILTER,
+#define NL_RTM_DELTFILTER NL_RTM_DELTFILTER
+ NL_RTM_GETTFILTER,
+#define NL_RTM_GETTFILTER NL_RTM_GETTFILTER
+ NL_RTM_NEWACTION = 48,
+#define NL_RTM_NEWACTION NL_RTM_NEWACTION
+ NL_RTM_DELACTION,
+#define NL_RTM_DELACTION NL_RTM_DELACTION
+ NL_RTM_GETACTION,
+#define NL_RTM_GETACTION NL_RTM_GETACTION
+ NL_RTM_NEWPREFIX = 52,
+#define NL_RTM_NEWPREFIX NL_RTM_NEWPREFIX
+ NL_RTM_GETMULTICAST = 58,
+#define NL_RTM_GETMULTICAST NL_RTM_GETMULTICAST
+ NL_RTM_GETANYCAST = 62,
+#define NL_RTM_GETANYCAST NL_RTM_GETANYCAST
+ NL_RTM_NEWNEIGHTBL = 64,
+#define NL_RTM_NEWNEIGHTBL NL_RTM_NEWNEIGHTBL
+ NL_RTM_GETNEIGHTBL = 66,
+#define NL_RTM_GETNEIGHTBL NL_RTM_GETNEIGHTBL
+ NL_RTM_SETNEIGHTBL,
+#define NL_RTM_SETNEIGHTBL NL_RTM_SETNEIGHTBL
+ NL_RTM_NEWNDUSEROPT = 68,
+#define NL_RTM_NEWNDUSEROPT NL_RTM_NEWNDUSEROPT
+ NL_RTM_NEWADDRLABEL = 72,
+#define NL_RTM_NEWADDRLABEL NL_RTM_NEWADDRLABEL
+ NL_RTM_DELADDRLABEL,
+#define NL_RTM_DELADDRLABEL NL_RTM_DELADDRLABEL
+ NL_RTM_GETADDRLABEL,
+#define NL_RTM_GETADDRLABEL NL_RTM_GETADDRLABEL
+ NL_RTM_GETDCB = 78,
+#define NL_RTM_GETDCB NL_RTM_GETDCB
+ NL_RTM_SETDCB,
+#define NL_RTM_SETDCB NL_RTM_SETDCB
+ NL_RTM_NEWNETCONF = 80,
+#define NL_RTM_NEWNETCONF NL_RTM_NEWNETCONF
+ NL_RTM_GETNETCONF = 82,
+#define NL_RTM_GETNETCONF NL_RTM_GETNETCONF
+ NL_RTM_NEWMDB = 84,
+#define NL_RTM_NEWMDB NL_RTM_NEWMDB
+ NL_RTM_DELMDB = 85,
+#define NL_RTM_DELMDB NL_RTM_DELMDB
+ NL_RTM_GETMDB = 86,
+#define NL_RTM_GETMDB NL_RTM_GETMDB
+ NL_RTM_NEWNSID = 88,
+#define NL_RTM_NEWNSID NL_RTM_NEWNSID
+ NL_RTM_DELNSID = 89,
+#define NL_RTM_DELNSID NL_RTM_DELNSID
+ NL_RTM_GETNSID = 90,
+#define NL_RTM_GETNSID NL_RTM_GETNSID
+ NL_RTM_NEWSTATS = 92,
+#define NL_RTM_NEWSTATS NL_RTM_NEWSTATS
+ NL_RTM_GETSTATS = 94,
+#define NL_RTM_GETSTATS NL_RTM_GETSTATS
+ NL_RTM_NEWNEXTHOP = 104,
+#define NL_RTM_NEWNEXTHOP NL_RTM_NEWNEXTHOP
+ NL_RTM_DELNEXTHOP,
+#define NL_RTM_DELNEXTHOP NL_RTM_DELNEXTHOP
+ NL_RTM_GETNEXTHOP,
+#define NL_RTM_GETNEXTHOP NL_RTM_GETNEXTHOP
+ __NL_RTM_MAX,
+};
+#define NL_RTM_MAX (((__NL_RTM_MAX + 3) & ~3) - 1)
+
+#ifndef _KERNEL
+/*
+ * RTM_* namespace clashes with BSD rtsock namespace.
+ * Use NL_RTM_ prefix in the kernel and map it to RTM_
+ * for userland.
+ */
+#define RTM_BASE NL_RTM_BASE
+#define RTM_NEWLINK NL_RTM_NEWLINK
+#define RTM_DELLINK NL_RTM_DELLINK
+#define RTM_GETLINK NL_RTM_GETLINK
+#define RTM_SETLINK NL_RTM_SETLINK
+#define RTM_NEWADDR NL_RTM_NEWADDR
+#define RTM_DELADDR NL_RTM_DELADDR
+#define RTM_GETADDR NL_RTM_GETADDR
+#define RTM_NEWROUTE NL_RTM_NEWROUTE
+#define RTM_DELROUTE NL_RTM_DELROUTE
+#define RTM_GETROUTE NL_RTM_GETROUTE
+#define RTM_NEWNEXTHOP NL_RTM_NEWNEXTHOP
+#define RTM_DELNEXTHOP NL_RTM_DELNEXTHOP
+#define RTM_GETNEXTHOP NL_RTM_GETNEXTHOP
+#endif
+
+
+/*
+ * Route-related (RTM_<NEW|DEL|GET>ROUTE) message header and attributes.
+ */
+struct rtmsg {
+ unsigned char rtm_family; /* address family */
+ unsigned char rtm_dst_len; /* Prefix length */
+ unsigned char rtm_src_len; /* Source prefix length (not used) */
+ unsigned char rtm_tos; /* Type of service (not used) */
+ unsigned char rtm_table; /* rtable id */
+ unsigned char rtm_protocol; /* Routing protocol id (RTPROT_) */
+ unsigned char rtm_scope; /* Route distance (RT_SCOPE_) */
+ unsigned char rtm_type; /* Route type (RTN_) */
+ unsigned rtm_flags; /* Route flags (RTM_F_) */
+};
+
+/*
+ * RFC 3549, 3.1.1, route type (rtm_type field).
+ */
+enum {
+ RTN_UNSPEC,
+ RTN_UNICAST, /* Unicast route */
+ RTN_LOCAL, /* Accept locally (not supported) */
+ RTN_BROADCAST, /* Accept locally as broadcast, send as broadcast */
+ RTN_ANYCAST, /* Accept locally as broadcast, but send as unicast */
+ RTN_MULTICAST, /* Multicast route */
+ RTN_BLACKHOLE, /* Drop traffic towards destination */
+ RTN_UNREACHABLE,/* Destination is unreachable */
+ RTN_PROHIBIT, /* Administratively prohibited */
+ RTN_THROW, /* Not in this table (not supported) */
+ RTN_NAT, /* Translate this address (not supported) */
+ RTN_XRESOLVE, /* Use external resolver (not supported) */
+ __RTN_MAX,
+};
+#define RTN_MAX (__RTN_MAX - 1)
+
+/*
+ * RFC 3549, 3.1.1, protocol (Identifies what/who added the route).
+ * Values larger than RTPROT_STATIC(4) are not interpreted by the
+ * kernel, they are just for user information.
+ */
+
+#define RTPROT_UNSPEC 0
+#define RTPROT_REDIRECT 1 /* Route installed by ICMP redirect */
+#define RTPROT_KERNEL 2 /* Route installed by kernel */
+#define RTPROT_BOOT 3 /* Route installed during boot */
+#define RTPROT_STATIC 4 /* Route installed by administrator */
+
+#define RTPROT_GATED 8 /* Apparently, GateD */
+#define RTPROT_RA 9 /* RDISC/ND router advertisements */
+#define RTPROT_MRT 10 /* Merit MRT */
+#define RTPROT_ZEBRA 11 /* Zebra */
+#define RTPROT_BIRD 12 /* BIRD */
+#define RTPROT_DNROUTED 13 /* DECnet routing daemon */
+#define RTPROT_XORP 14 /* XORP */
+#define RTPROT_NTK 15 /* Netsukuku */
+#define RTPROT_DHCP 16 /* DHCP client */
+#define RTPROT_MROUTED 17 /* Multicast daemon */
+#define RTPROT_KEEPALIVED 18 /* Keepalived daemon */
+#define RTPROT_BABEL 42 /* Babel daemon */
+#define RTPROT_OPENR 99 /* Open Routing (Open/R) Routes */
+#define RTPROT_BGP 186 /* BGP Routes */
+#define RTPROT_ISIS 187 /* ISIS Routes */
+#define RTPROT_OSPF 188 /* OSPF Routes */
+#define RTPROT_RIP 189 /* RIP Routes */
+#define RTPROT_EIGRP 192 /* EIGRP Routes */
+
+/*
+ * RFC 3549 3.1.1 Route scope (valid distance to destination).
+ *
+ * The values between RT_SCOPE_UNIVERSE(0) and RT_SCOPE_SITE(200)
+ * are available to the user.
+*/
+enum rt_scope_t {
+ RT_SCOPE_UNIVERSE = 0,
+ /* User defined values */
+ RT_SCOPE_SITE = 200,
+ RT_SCOPE_LINK = 253,
+ RT_SCOPE_HOST = 254,
+ RT_SCOPE_NOWHERE = 255
+};
+
+/*
+ * RFC 3549 3.1.1 Route flags.
+ *
+*/
+#define RTM_F_NOTIFY 0x100 /* Notify user of route change */
+#define RTM_F_CLONED 0x200 /* This route is cloned (not used) */
+#define RTM_F_EQUALIZE 0x400 /* Multipath equalizer: NI */
+#define RTM_F_PREFIX 0x800 /* Prefix addresses */
+#define RTM_F_LOOKUP_TABLE 0x1000 /* set tableid to FIB lookup result */
+#define RTM_F_FIB_MATCH 0x2000 /* return full fib lookup match */
+#define RTM_F_OFFLOAD 0x4000 /* route is offloaded */
+#define RTM_F_TRAP 0x8000 /* route is trapping packets */
+#define RTM_F_OFFLOAD_FAILED 0x20000000 /* route offload failed */
+
+/* Compatibility handling helpers */
+#ifndef _KERNEL
+#define NL_RTM_HDRLEN ((int)sizeof(struct rtmsg))
+#define RTM_RTA(_rtm) ((struct rtattr *)(NL_ITEM_DATA(_rtm) + NL_RTM_HDRLEN))
+#define RTM_PAYLOAD(_hdr) NLMSG_PAYLOAD((_hdr), NL_RTM_HDRLEN)
+#endif
+
+/*
+ * Routing table identifiers.
+ * Contrary to Linux implementation, FreeBSD route table numbering starts from 0.
+ * Thus, indicating "all tables" can be done by not including RTA_TABLE attribute
+ * and keeping rtm_table=0 (compatibility) or setting RTA_TABLE value to RT_TABLE_UNSPEC.
+ */
+#define RT_TABLE_MAIN 0 /* RT_DEFAULT_FIB */
+#define RT_TABLE_UNSPEC 0xFFFFFFFF /* RT_ALL_FIBS */
+
+enum rtattr_type_t {
+ NL_RTA_UNSPEC,
+ NL_RTA_DST,
+ NL_RTA_SRC,
+ NL_RTA_IIF,
+ NL_RTA_OIF,
+ NL_RTA_GATEWAY,
+ NL_RTA_PRIORITY,
+ NL_RTA_PREFSRC,
+ NL_RTA_METRICS,
+ NL_RTA_MULTIPATH,
+ NL_RTA_PROTOINFO, /* not used / deprecated */
+ NL_RTA_FLOW,
+ NL_RTA_CACHEINFO, /* not used */
+ NL_RTA_SESSION, /* not used / deprecated */
+ NL_RTA_MP_ALGO, /* not used / deprecated */
+ NL_RTA_TABLE,
+ NL_RTA_MARK, /* not used */
+ NL_RTA_MFC_STATS,
+ NL_RTA_VIA,
+ NL_RTA_NEWDST,
+ NL_RTA_PREF,
+ NL_RTA_ENCAP_TYPE,
+ NL_RTA_ENCAP,
+ NL_RTA_EXPIRES,
+ NL_RTA_PAD,
+ NL_RTA_UID,
+ NL_RTA_TTL_PROPAGATE,
+ NL_RTA_IP_PROTO,
+ NL_RTA_SPORT,
+ NL_RTA_DPORT,
+ NL_RTA_NH_ID,
+ __RTA_MAX
+};
+#define NL_RTA_MAX (__RTA_MAX - 1)
+
+#ifndef _KERNEL
+/*
+ * RTA_* space has clashes with rtsock namespace.
+ * Use NL_RTA_ prefix in the kernel and map to
+ * RTA_ for userland.
+ */
+#define RTA_UNSPEC NL_RTA_UNSPEC
+#define RTA_DST NL_RTA_DST
+#define RTA_SRC NL_RTA_SRC
+#define RTA_IIF NL_RTA_IIF
+#define RTA_OIF NL_RTA_OIF
+#define RTA_GATEWAY NL_RTA_GATEWAY
+#define RTA_PRIORITY NL_RTA_PRIORITY
+#define RTA_PREFSRC NL_RTA_PREFSRC
+#define RTA_METRICS NL_RTA_METRICS
+#define RTA_MULTIPATH NL_RTA_MULTIPATH
+#define RTA_PROTOINFO NL_RTA_PROTOINFO
+#define RTA_FLOW NL_RTA_FLOW
+#define RTA_CACHEINFO NL_RTA_CACHEINFO
+#define RTA_SESSION NL_RTA_SESSION
+#define RTA_MP_ALGO NL_RTA_MP_ALGO
+#define RTA_TABLE NL_RTA_TABLE
+#define RTA_MARK NL_RTA_MARK
+#define RTA_MFC_STATS NL_RTA_MFC_STATS
+#define RTA_VIA NL_RTA_VIA
+#define RTA_NEWDST NL_RTA_NEWDST
+#define RTA_PREF NL_RTA_PREF
+#define RTA_ENCAP_TYPE NL_RTA_ENCAP_TYPE
+#define RTA_ENCAP NL_RTA_ENCAP
+#define RTA_EXPIRES NL_RTA_EXPIRES
+#define RTA_PAD NL_RTA_PAD
+#define RTA_UID NL_RTA_UID
+#define RTA_TTL_PROPAGATE NL_RTA_TTL_PROPAGATE
+#define RTA_IP_PROTO NL_RTA_IP_PROTO
+#define RTA_SPORT NL_RTA_SPORT
+#define RTA_DPORT NL_RTA_DPORT
+#define RTA_NH_ID NL_RTA_NH_ID
+#define RTA_MAX NL_RTA_MAX
+#endif
+
+/*
+ * route attribute header
+ */
+struct rtattr {
+ unsigned short rta_len;
+ unsigned short rta_type;
+};
+
+#define NL_RTA_ALIGN_SIZE NL_ITEM_ALIGN_SIZE
+#define NL_RTA_ALIGN NL_ITEM_ALIGN
+#define NL_RTA_HDRLEN ((int)sizeof(struct rtattr))
+#define NL_RTA_DATA_LEN(_rta) ((int)((_rta)->rta_len - NL_RTA_HDRLEN))
+#define NL_RTA_DATA(_rta) NL_ITEM_DATA(_rta, NL_RTA_HDRLEN)
+#define NL_RTA_DATA_CONST(_rta) NL_ITEM_DATA_CONST(_rta, NL_RTA_HDRLEN)
+
+/* Compatibility attribute handling helpers */
+#ifndef _KERNEL
+#define RTA_ALIGNTO NL_RTA_ALIGN_SIZE
+#define RTA_ALIGN(_len) NL_RTA_ALIGN(_len)
+#define _RTA_LEN(_rta) ((int)(_rta)->rta_len)
+#define _RTA_ALIGNED_LEN(_rta) RTA_ALIGN(_RTA_LEN(_rta))
+#define RTA_OK(_rta, _len) NL_ITEM_OK(_rta, _len, NL_RTA_HDRLEN, _RTA_LEN)
+#define RTA_NEXT(_rta, _len) NL_ITEM_ITER(_rta, _len, _RTA_ALIGNED_LEN)
+#define RTA_LENGTH(_len) (NL_RTA_HDRLEN + (_len))
+#define RTA_SPACE(_len) RTA_ALIGN(RTA_LENGTH(_len))
+#define RTA_DATA(_rta) NL_RTA_DATA(_rta)
+#define RTA_PAYLOAD(_rta) ((int)(_RTA_LEN(_rta) - NL_RTA_HDRLEN)
+#endif
+
+/* RTA attribute headers */
+
+/* RTA_VIA */
+struct rtvia {
+ sa_family_t rtvia_family;
+ uint8_t rtvia_addr[0];
+};
+
+
+/*
+ * RTA_METRICS is a nested attribute, consistes of array of 'struct rtattr'
+ * with the types defined below. Most of the values are uint32_t.
+ */
+ enum {
+ NL_RTAX_UNSPEC,
+#define NL_RTAX_UNSPEC NL_RTAX_UNSPEC
+ NL_RTAX_LOCK,
+#define NL_RTAX_LOCK NL_RTAX_LOCK
+ NL_RTAX_MTU,
+#define NL_RTAX_MTU NL_RTAX_MTU
+ NL_RTAX_WINDOW,
+#define NL_RTAX_WINDOW NL_RTAX_WINDOW
+ NL_RTAX_RTT,
+#define NL_RTAX_RTT NL_RTAX_RTT
+ NL_RTAX_RTTVAR,
+#define NL_RTAX_RTTVAR NL_RTAX_RTTVAR
+ NL_RTAX_SSTHRESH,
+#define NL_RTAX_SSTHRESH NL_RTAX_SSTHRESH
+ NL_RTAX_CWND,
+#define NL_RTAX_CWND NL_RTAX_CWND
+ NL_RTAX_ADVMSS,
+#define NL_RTAX_ADVMSS NL_RTAX_ADVMSS
+ NL_RTAX_REORDERING,
+#define NL_RTAX_REORDERING NL_RTAX_REORDERING
+ NL_RTAX_HOPLIMIT,
+#define NL_RTAX_HOPLIMIT NL_RTAX_HOPLIMIT
+ NL_RTAX_INITCWND,
+#define NL_RTAX_INITCWND NL_RTAX_INITCWND
+ NL_RTAX_FEATURES,
+#define NL_RTAX_FEATURES NL_RTAX_FEATURES
+ NL_RTAX_RTO_MIN,
+#define NL_RTAX_RTO_MIN NL_RTAX_RTO_MIN
+ NL_RTAX_INITRWND,
+#define NL_RTAX_INITRWND NL_RTAX_INITRWND
+ NL_RTAX_QUICKACK,
+#define NL_RTAX_QUICKACK NL_RTAX_QUICKACK
+ NL_RTAX_CC_ALGO,
+#define NL_RTAX_CC_ALGO NL_RTAX_CC_ALGO
+ NL_RTAX_FASTOPEN_NO_COOKIE,
+#define NL_RTAX_FASTOPEN_NO_COOKIE NL_RTAX_FASTOPEN_NO_COOKIE
+ __NL_RTAX_MAX
+};
+#define NL_RTAX_MAX (__NL_RTAX_MAX - 1)
+
+#define RTAX_FEATURE_ECN (1 << 0)
+#define RTAX_FEATURE_SACK (1 << 1)
+#define RTAX_FEATURE_TIMESTAMP (1 << 2)
+#define RTAX_FEATURE_ALLFRAG (1 << 3)
+
+#define RTAX_FEATURE_MASK \
+ (RTAX_FEATURE_ECN | RTAX_FEATURE_SACK | RTAX_FEATURE_TIMESTAMP | \
+ RTAX_FEATURE_ALLFRAG)
+
+#ifndef _KERNEL
+/*
+ * RTAX_* space clashes with rtsock namespace.
+ * Use NL_RTAX_ prefix in the kernel and map to
+ * RTAX_ for userland.
+ */
+#define RTAX_UNSPEC NL_RTAX_UNSPEC
+#define RTAX_LOCK NL_RTAX_LOCK
+#define RTAX_MTU NL_RTAX_MTU
+#define RTAX_WINDOW NL_RTAX_WINDOW
+#define RTAX_RTT NL_RTAX_RTT
+#define RTAX_RTTVAR NL_RTAX_RTTVAR
+#define RTAX_SSTHRESH NL_RTAX_SSTHRESH
+#define RTAX_CWND NL_RTAX_CWND
+#define RTAX_ADVMSS NL_RTAX_ADVMSS
+#define RTAX_REORDERING NL_RTAX_REORDERING
+#define RTAX_HOPLIMIT NL_RTAX_HOPLIMIT
+#define RTAX_INITCWND NL_RTAX_INITCWND
+#define RTAX_FEATURES NL_RTAX_FEATURES
+#define RTAX_RTO_MIN NL_RTAX_RTO_MIN
+#define RTAX_INITRWND NL_RTAX_INITRWND
+#define RTAX_QUICKACK NL_RTAX_QUICKACK
+#define RTAX_CC_ALGO NL_RTAX_CC_ALGO
+#define RTAX_FASTOPEN_NO_COOKIE NL_RTAX_FASTOPEN_NO_COOKIE
+#endif
+
+/*
+ * RTA_MULTIPATH consists of array of rtnexthop structures.
+ */
+struct rtnexthop {
+ unsigned short rtnh_len;
+ unsigned char rtnh_flags;
+ unsigned char rtnh_hops;
+ int rtnh_ifindex;
+};
+
+/* rtnh_flags */
+
+#define RTNH_F_DEAD 0x01 /* Nexthop is dead (used by multipath) */
+#define RTNH_F_PERVASIVE 0x02 /* Do recursive gateway lookup */
+#define RTNH_F_ONLINK 0x04 /* Gateway is forced on link */
+#define RTNH_F_OFFLOAD 0x08 /* Nexthop is offloaded */
+#define RTNH_F_LINKDOWN 0x10 /* carrier-down on nexthop */
+#define RTNH_F_UNRESOLVED 0x20 /* The entry is unresolved (ipmr) */
+#define RTNH_F_TRAP 0x40 /* Nexthop is trapping packets */
+
+#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | \
+ RTNH_F_OFFLOAD | RTNH_F_TRAP)
+
+/* Macros to handle hexthops */
+
+#define RTNH_ALIGNTO NL_ITEM_ALIGN_SIZE
+#define RTNH_ALIGN(_len) NL_ITEM_ALIGN(_len)
+#define RTNH_HDRLEN ((int)sizeof(struct rtnexthop))
+#define _RTNH_LEN(_nh) ((int)(_nh)->rtnh_len)
+#define _RTNH_ALIGNED_LEN(_nh) RTNH_ALIGN(_RTNH_LEN(_nh))
+#define RTNH_OK(_nh, _len) NL_ITEM_OK(_nh, _len, RTNH_HDRLEN, _RTNH_LEN)
+//#define RTNH_NEXT(_nh) (struct rtnexthop *)NL_ITEM_DATA(_nh, RTNH_HDRLEN)
+//#define RTNH_NEXT(_nh) NL_ITEM_NEXT(_nh, _RTNH_ALIGNED_LEN(_nh))
+#define RTNH_LENGTH(_len) (RTNH_HDRLEN + (_len))
+#define RTNH_SPACE(_len) RTNH_ALIGN(RTNH_LENGTH(_len))
+#define RTNH_DATA(_nh) ((struct rtattr *)NL_ITEM_DATA(_nh, RTNH_HDRLEN))
+
+
+struct rtgenmsg {
+ unsigned char rtgen_family;
+};
+
+
+/*
+ * NEXTHOP-related (RTM_<NEW|DEL|GET>NEXTHOP) message header and attributes.
+ */
+
+struct nhmsg {
+ unsigned char nh_family;
+ unsigned char nh_scope; /* ignored on RX, filled by kernel */
+ unsigned char nh_protocol; /* Routing protocol that installed nh */
+ unsigned char resvd;
+ unsigned int nh_flags; /* RTNH_F_* flags */
+};
+
+/* entry in a nexthop group */
+struct nexthop_grp {
+ uint32_t id; /* nexhop userland index */
+ uint8_t weight; /* weight of this nexthop */
+ uint8_t resvd1;
+ uint16_t resvd2;
+};
+
+enum {
+ NEXTHOP_GRP_TYPE_MPATH, /* default nexthop group */
+ NEXTHOP_GRP_TYPE_RES, /* resilient nexthop group */
+ __NEXTHOP_GRP_TYPE_MAX,
+};
+#define NEXTHOP_GRP_TYPE_MAX (__NEXTHOP_GRP_TYPE_MAX - 1)
+
+enum {
+ NHA_UNSPEC,
+ NHA_ID, /* u32; id for nexthop. id == 0 means auto-assign */
+
+ NHA_GROUP, /* array of nexthop_grp */
+ NHA_GROUP_TYPE, /* u16 one of NEXTHOP_GRP_TYPE */
+ /* if NHA_GROUP attribute is added, no other attributes can be set */
+
+ NHA_BLACKHOLE, /* flag; nexthop used to blackhole packets */
+ /* if NHA_BLACKHOLE is added, OIF, GATEWAY, ENCAP can not be set */
+
+ NHA_OIF, /* u32; nexthop device */
+ NHA_GATEWAY, /* be32 (IPv4) or in6_addr (IPv6) gw address */
+ NHA_ENCAP_TYPE, /* u16; lwt encap type */
+ NHA_ENCAP, /* lwt encap data */
+
+ /* NHA_OIF can be appended to dump request to return only
+ * nexthops using given device
+ */
+ NHA_GROUPS, /* flag; only return nexthop groups in dump */
+ NHA_MASTER, /* u32; only return nexthops with given master dev */
+
+ NHA_FDB, /* flag; nexthop belongs to a bridge fdb */
+ /* if NHA_FDB is added, OIF, BLACKHOLE, ENCAP cannot be set */
+
+ /* nested; resilient nexthop group attributes */
+ NHA_RES_GROUP,
+ /* nested; nexthop bucket attributes */
+ NHA_RES_BUCKET,
+
+ __NHA_MAX,
+};
+
+#define NHA_MAX (__NHA_MAX - 1)
+
+enum {
+ NHA_RES_GROUP_UNSPEC,
+ /* Pad attribute for 64-bit alignment. */
+ NHA_RES_GROUP_PAD = NHA_RES_GROUP_UNSPEC,
+
+ /* u16; number of nexthop buckets in a resilient nexthop group */
+ NHA_RES_GROUP_BUCKETS,
+ /* clock_t as u32; nexthop bucket idle timer (per-group) */
+ NHA_RES_GROUP_IDLE_TIMER,
+ /* clock_t as u32; nexthop unbalanced timer */
+ NHA_RES_GROUP_UNBALANCED_TIMER,
+ /* clock_t as u64; nexthop unbalanced time */
+ NHA_RES_GROUP_UNBALANCED_TIME,
+ __NHA_RES_GROUP_MAX,
+};
+#define NHA_RES_GROUP_MAX (__NHA_RES_GROUP_MAX - 1)
+
+
+/*****************************************************************
+ * Link layer specific messages.
+ ****/
+
+/* struct ifinfomsg
+ * passes link level specific information, not dependent
+ * on network protocol.
+ */
+
+struct ifinfomsg {
+ unsigned char ifi_family; /* Related XX */
+ unsigned char __ifi_pad;
+ unsigned short ifi_type; /* ARPHRD_* */
+ int ifi_index; /* Link index */
+ unsigned ifi_flags; /* IFF_* flags */
+ unsigned ifi_change; /* IFF_* change mask */
+};
+
+#ifndef _KERNEL
+/* Compatilbility helpers */
+#define _IFINFO_HDRLEN ((int)sizeof(struct ifinfomsg))
+#define IFLA_RTA(_ifi) ((struct rtattr *)NL_ITEM_DATA(_ifi, _IFINFO_HDRLEN))
+#define IFLA_PAYLOAD(_ifi) NLMSG_PAYLOAD(_ifi, _IFINFO_HDRLEN)
+#endif
+
+enum {
+ IFLA_UNSPEC,
+ IFLA_ADDRESS,
+ IFLA_BROADCAST,
+ IFLA_IFNAME,
+ IFLA_MTU,
+ IFLA_LINK,
+ IFLA_QDISC,
+ IFLA_STATS,
+ IFLA_COST,
+#define IFLA_COST IFLA_COST
+ IFLA_PRIORITY,
+#define IFLA_PRIORITY IFLA_PRIORITY
+ IFLA_MASTER,
+#define IFLA_MASTER IFLA_MASTER
+ IFLA_WIRELESS, /* Wireless Extension event - see wireless.h */
+#define IFLA_WIRELESS IFLA_WIRELESS
+ IFLA_PROTINFO, /* Protocol specific information for a link */
+#define IFLA_PROTINFO IFLA_PROTINFO
+ IFLA_TXQLEN,
+#define IFLA_TXQLEN IFLA_TXQLEN
+ IFLA_MAP,
+#define IFLA_MAP IFLA_MAP
+ IFLA_WEIGHT,
+#define IFLA_WEIGHT IFLA_WEIGHT
+ IFLA_OPERSTATE,
+ IFLA_LINKMODE,
+ IFLA_LINKINFO,
+#define IFLA_LINKINFO IFLA_LINKINFO
+ IFLA_NET_NS_PID,
+ IFLA_IFALIAS,
+ IFLA_NUM_VF, /* Number of VFs if device is SR-IOV PF */
+ IFLA_VFINFO_LIST,
+ IFLA_STATS64,
+ IFLA_VF_PORTS,
+ IFLA_PORT_SELF,
+ IFLA_AF_SPEC,
+ IFLA_GROUP, /* Group the device belongs to */
+ IFLA_NET_NS_FD,
+ IFLA_EXT_MASK, /* Extended info mask, VFs, etc */
+ IFLA_PROMISCUITY, /* Promiscuity count: > 0 means acts PROMISC */
+#define IFLA_PROMISCUITY IFLA_PROMISCUITY
+ IFLA_NUM_TX_QUEUES,
+ IFLA_NUM_RX_QUEUES,
+ IFLA_CARRIER,
+ IFLA_PHYS_PORT_ID,
+ IFLA_CARRIER_CHANGES,
+ IFLA_PHYS_SWITCH_ID,
+ IFLA_LINK_NETNSID,
+ IFLA_PHYS_PORT_NAME,
+ IFLA_PROTO_DOWN,
+ IFLA_GSO_MAX_SEGS,
+ IFLA_GSO_MAX_SIZE,
+ IFLA_PAD,
+ IFLA_XDP,
+ IFLA_EVENT,
+ IFLA_NEW_NETNSID,
+ IFLA_IF_NETNSID,
+ IFLA_TARGET_NETNSID = IFLA_IF_NETNSID, /* new alias */
+ IFLA_CARRIER_UP_COUNT,
+ IFLA_CARRIER_DOWN_COUNT,
+ IFLA_NEW_IFINDEX,
+ IFLA_MIN_MTU,
+ IFLA_MAX_MTU,
+ IFLA_PROP_LIST,
+ IFLA_ALT_IFNAME, /* Alternative ifname */
+ IFLA_PERM_ADDRESS,
+ IFLA_PROTO_DOWN_REASON,
+ __IFLA_MAX };
+
+#define IFLA_MAX (__IFLA_MAX - 1)
+
+
+/********************************************************************
+ * prefix information
+ ****/
+
+struct prefixmsg {
+ unsigned char prefix_family;
+ unsigned char prefix_pad1;
+ unsigned short prefix_pad2;
+ int prefix_ifindex;
+ unsigned char prefix_type;
+ unsigned char prefix_len;
+ unsigned char prefix_flags;
+ unsigned char prefix_pad3;
+};
+
+enum { PREFIX_UNSPEC, PREFIX_ADDRESS, PREFIX_CACHEINFO, __PREFIX_MAX };
+
+#define PREFIX_MAX (__PREFIX_MAX - 1)
+
+struct prefix_cacheinfo {
+ uint32_t preferred_time;
+ uint32_t valid_time;
+};
+
+#ifndef _KERNEL
+/* RTnetlink multicast groups - backwards compatibility for userspace */
+#define RTMGRP_LINK 0x01
+#define RTMGRP_NOTIFY 0x02
+#define RTMGRP_NEIGH 0x04
+#define RTMGRP_TC 0x08
+
+#define RTMGRP_IPV4_IFADDR 0x10
+#define RTMGRP_IPV4_MROUTE 0x20
+#define RTMGRP_IPV4_ROUTE 0x40
+#define RTMGRP_IPV4_RULE 0x80
+
+#define RTMGRP_IPV6_IFADDR 0x100
+#define RTMGRP_IPV6_MROUTE 0x200
+#define RTMGRP_IPV6_ROUTE 0x400
+#define RTMGRP_IPV6_IFINFO 0x800
+
+#define RTMGRP_DECnet_IFADDR 0x1000
+#define RTMGRP_DECnet_ROUTE 0x4000
+
+#define RTMGRP_IPV6_PREFIX 0x20000
+#endif
+
+/* RTnetlink multicast groups */
+enum rtnetlink_groups {
+ RTNLGRP_NONE,
+#define RTNLGRP_NONE RTNLGRP_NONE
+ RTNLGRP_LINK,
+#define RTNLGRP_LINK RTNLGRP_LINK
+ RTNLGRP_NOTIFY,
+#define RTNLGRP_NOTIFY RTNLGRP_NOTIFY
+ RTNLGRP_NEIGH,
+#define RTNLGRP_NEIGH RTNLGRP_NEIGH
+ RTNLGRP_TC,
+#define RTNLGRP_TC RTNLGRP_TC
+ RTNLGRP_IPV4_IFADDR,
+#define RTNLGRP_IPV4_IFADDR RTNLGRP_IPV4_IFADDR
+ RTNLGRP_IPV4_MROUTE,
+#define RTNLGRP_IPV4_MROUTE RTNLGRP_IPV4_MROUTE
+ RTNLGRP_IPV4_ROUTE,
+#define RTNLGRP_IPV4_ROUTE RTNLGRP_IPV4_ROUTE
+ RTNLGRP_IPV4_RULE,
+#define RTNLGRP_IPV4_RULE RTNLGRP_IPV4_RULE
+ RTNLGRP_IPV6_IFADDR,
+#define RTNLGRP_IPV6_IFADDR RTNLGRP_IPV6_IFADDR
+ RTNLGRP_IPV6_MROUTE,
+#define RTNLGRP_IPV6_MROUTE RTNLGRP_IPV6_MROUTE
+ RTNLGRP_IPV6_ROUTE,
+#define RTNLGRP_IPV6_ROUTE RTNLGRP_IPV6_ROUTE
+ RTNLGRP_IPV6_IFINFO,
+#define RTNLGRP_IPV6_IFINFO RTNLGRP_IPV6_IFINFO
+ RTNLGRP_DECnet_IFADDR,
+#define RTNLGRP_DECnet_IFADDR RTNLGRP_DECnet_IFADDR
+ RTNLGRP_NOP2,
+ RTNLGRP_DECnet_ROUTE,
+#define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE
+ RTNLGRP_DECnet_RULE,
+#define RTNLGRP_DECnet_RULE RTNLGRP_DECnet_RULE
+ RTNLGRP_NOP4,
+ RTNLGRP_IPV6_PREFIX,
+#define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX
+ RTNLGRP_IPV6_RULE,
+#define RTNLGRP_IPV6_RULE RTNLGRP_IPV6_RULE
+ RTNLGRP_ND_USEROPT,
+#define RTNLGRP_ND_USEROPT RTNLGRP_ND_USEROPT
+ RTNLGRP_PHONET_IFADDR,
+#define RTNLGRP_PHONET_IFADDR RTNLGRP_PHONET_IFADDR
+ RTNLGRP_PHONET_ROUTE,
+#define RTNLGRP_PHONET_ROUTE RTNLGRP_PHONET_ROUTE
+ RTNLGRP_DCB,
+#define RTNLGRP_DCB RTNLGRP_DCB
+ RTNLGRP_IPV4_NETCONF,
+#define RTNLGRP_IPV4_NETCONF RTNLGRP_IPV4_NETCONF
+ RTNLGRP_IPV6_NETCONF,
+#define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF
+ RTNLGRP_MDB,
+#define RTNLGRP_MDB RTNLGRP_MDB
+ RTNLGRP_MPLS_ROUTE,
+#define RTNLGRP_MPLS_ROUTE RTNLGRP_MPLS_ROUTE
+ RTNLGRP_NSID,
+#define RTNLGRP_NSID RTNLGRP_NSID
+ RTNLGRP_MPLS_NETCONF,
+#define RTNLGRP_MPLS_NETCONF RTNLGRP_MPLS_NETCONF
+ RTNLGRP_IPV4_MROUTE_R,
+#define RTNLGRP_IPV4_MROUTE_R RTNLGRP_IPV4_MROUTE_R
+ RTNLGRP_IPV6_MROUTE_R,
+#define RTNLGRP_IPV6_MROUTE_R RTNLGRP_IPV6_MROUTE_R
+ RTNLGRP_NEXTHOP,
+#define RTNLGRP_NEXTHOP RTNLGRP_NEXTHOP
+ RTNLGRP_BRVLAN,
+#define RTNLGRP_BRVLAN RTNLGRP_BRVLAN
+ __RTNLGRP_MAX
+};
+#define RTNLGRP_MAX (__RTNLGRP_MAX - 1)
+
+// START OF IF_ADDR SECTION
+
+struct ifaddrmsg {
+ uint8_t ifa_family; /* Address family */
+ uint8_t ifa_prefixlen; /* Prefix length */
+ uint8_t ifa_flags; /* Address-specific flags */
+ uint8_t ifa_scope; /* Address scope */
+ uint32_t ifa_index; /* Link ifindex */
+};
+
+#ifndef _KERNEL
+#define _NL_IFA_HDRLEN ((int)sizeof(struct ifaddrmsg))
+#define IFA_RTA(_ifa) ((struct rtattr *)(NL_ITEM_DATA(_ifa, _NL_IFA_HDRLEN)))
+#define IFA_PAYLOAD(_hdr) NLMSG_PAYLOAD(_hdr, _NL_IFA_HDRLEN)
+#endif
+
+/*
+ * Important comment:
+ * IFA_ADDRESS is prefix address, rather than local interface address.
+ * It makes no difference for normally configured broadcast interfaces,
+ * but for point-to-point IFA_ADDRESS is DESTINATION address,
+ * local address is supplied in IFA_LOCAL attribute.
+ *
+ * IFA_FLAGS is a u32 attribute that extends the u8 field ifa_flags.
+ * If present, the value from struct ifaddrmsg will be ignored.
+ */
+enum {
+ IFA_UNSPEC,
+ IFA_ADDRESS,
+ IFA_LOCAL,
+ IFA_LABEL,
+ IFA_BROADCAST,
+ IFA_ANYCAST,
+ IFA_CACHEINFO,
+ IFA_MULTICAST,
+ IFA_FLAGS,
+ IFA_RT_PRIORITY, /* u32, priority/metric for prefix route */
+ IFA_TARGET_NETNSID,
+ __IFA_MAX,
+};
+#define IFA_MAX (__IFA_MAX - 1)
+
+/* ifa_flags */
+#define IFA_F_SECONDARY 0x01
+#define IFA_F_TEMPORARY IFA_F_SECONDARY
+#define IFA_F_NODAD 0x02
+#define IFA_F_OPTIMISTIC 0x04
+#define IFA_F_DADFAILED 0x08
+#define IFA_F_HOMEADDRESS 0x10
+#define IFA_F_DEPRECATED 0x20
+#define IFA_F_TENTATIVE 0x40
+#define IFA_F_PERMANENT 0x80
+#define IFA_F_MANAGETEMPADDR 0x100
+#define IFA_F_NOPREFIXROUTE 0x200
+#define IFA_F_MCAUTOJOIN 0x400
+#define IFA_F_STABLE_PRIVACY 0x800
+
+/* */
+
+
+struct ifa_cacheinfo {
+ uint32_t ifa_prefered;
+ uint32_t ifa_valid;
+ uint32_t cstamp; /* created timestamp, hundredths of seconds */
+ uint32_t tstamp; /* updated timestamp, hundredths of seconds */
+};
+
+// END OF IF_ADDR SECTION
+
+#endif
Index: sys/netlink/netlink_route.c
===================================================================
--- /dev/null
+++ sys/netlink/netlink_route.c
@@ -0,0 +1,966 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Ng Peng Nam Sean
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include "opt_route.h"
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/rmlock.h>
+#include <sys/socket.h>
+#include <sys/ck.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+#include <net/route/nhop.h>
+#include <net/route/route_ctl.h>
+#include <net/route/route_var.h>
+#include <netlink/netlink.h>
+#include <netlink/netlink_var.h>
+#include <netlink/netlink_message.h>
+#include <netlink/netlink_route.h>
+
+#define DEBUG_MOD_NAME nl_route
+#define DEBUG_MAX_LEVEL LOG_DEBUG3
+#include <net/route/route_debug.h>
+_DECLARE_DEBUG(LOG_DEBUG3);
+
+
+#if defined(INET6) || defined(INET)
+
+#endif
+
+static unsigned char
+get_rtm_type(const struct nhop_object *nh)
+{
+ int nh_flags = nh->nh_flags;
+
+ /* Use the fact that nhg runtime flags are only NHF_MULTIPATH */
+ if (nh_flags & NHF_BLACKHOLE)
+ return (RTN_BLACKHOLE);
+ else if (nh_flags & NHF_REJECT)
+ return (RTN_PROHIBIT);
+ return (RTN_UNICAST);
+}
+
+static unsigned char
+get_rtm_protocol(const struct nhop_object *nh)
+{
+ if (NH_IS_NHGRP(nh)) {
+ const struct nhgrp_object *nhg = (const struct nhgrp_object *)nh;
+ nh = nhg->nhops[0];
+ }
+ int rt_flags = nhop_get_rtflags(nh);
+ if (rt_flags & RTF_PROTO1)
+ return (RTPROT_ZEBRA);
+ if (rt_flags & RTF_STATIC)
+ return (RTPROT_STATIC);
+ return (RTPROT_KERNEL);
+}
+
+static int
+get_rtmsg_type_from_rtsock(int cmd)
+{
+ switch (cmd) {
+ case RTM_ADD:
+ case RTM_CHANGE:
+ case RTM_GET:
+ return NL_RTM_NEWROUTE;
+ case RTM_DELETE:
+ return NL_RTM_DELROUTE;
+ }
+
+ return (0);
+}
+
+static struct sockaddr *
+parse_rta_ip4(void *rta_data, struct netlink_parse_tracker *npt, int *perror)
+{
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)npt_alloc_sockaddr(npt, sizeof(struct sockaddr_in));
+ if (__predict_false(sin == NULL)) {
+ *perror = ENOBUFS;
+ return (NULL);
+ }
+ sin->sin_len = sizeof(struct sockaddr_in);
+ sin->sin_family = AF_INET;
+ memcpy(&sin->sin_addr, rta_data, sizeof(struct in_addr));
+ return ((struct sockaddr *)sin);
+}
+
+static struct sockaddr *
+get_ip4_netmask(uint8_t plen, struct netlink_parse_tracker *npt, int *perror)
+{
+ struct in_addr mask;
+
+ if (__predict_false(plen > 32)) {
+ *perror = EINVAL;
+ return (NULL);
+ }
+
+ mask.s_addr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0);
+ return (parse_rta_ip4(&mask, npt, perror));
+}
+
+static struct sockaddr *
+parse_rta_ip6(void *rta_data, struct netlink_parse_tracker *npt, int *perror)
+{
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)npt_alloc_sockaddr(npt, sizeof(struct sockaddr_in6));
+ if (__predict_false(sin6 == NULL)) {
+ *perror = ENOBUFS;
+ return (NULL);
+ }
+ sin6->sin6_len = sizeof(struct sockaddr_in6);
+ sin6->sin6_family = AF_INET6;
+ memcpy(&sin6->sin6_addr, rta_data, sizeof(struct in_addr));
+ return ((struct sockaddr *)sin6);
+}
+
+static void
+ipv6_writemask(struct in6_addr *addr6, uint8_t mask)
+{
+ uint32_t *cp;
+
+ for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32)
+ *cp++ = 0xFFFFFFFF;
+ if (mask > 0)
+ *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
+}
+
+static struct sockaddr *
+get_ip6_netmask(uint8_t plen, struct netlink_parse_tracker *npt, int *perror)
+{
+ struct in6_addr mask6;
+
+ if (__predict_false(plen > 128)) {
+ *perror = EINVAL;
+ return (NULL);
+ }
+ ipv6_writemask(&mask6, plen);
+
+ return (parse_rta_ip6(&mask6, npt, perror));
+}
+
+static struct sockaddr *
+parse_rta_ip(struct rtattr *rta, struct netlink_parse_tracker *npt, int *perror)
+{
+ void *rta_data = NL_RTA_DATA(rta);
+ int rta_len = NL_RTA_DATA_LEN(rta);
+
+ if (rta_len == sizeof(struct in_addr)) {
+ return (parse_rta_ip4(rta_data, npt, perror));
+ } else if (rta_len == sizeof(struct in6_addr)) {
+ return (parse_rta_ip6(rta_data, npt, perror));
+ } else {
+ RT_LOG(LOG_NOTICE, "unknown IP len: %d for rta type %d",
+ rta_len, rta->rta_type);
+ *perror = ENOTSUP;
+ return (NULL);
+ }
+ return (NULL);
+}
+
+static struct sockaddr *
+parse_rta_via(struct rtattr *rta, struct netlink_parse_tracker *npt, int *perror)
+{
+ struct rtvia *via = NL_RTA_DATA(rta);
+ int data_len = NL_RTA_DATA_LEN(rta);
+
+ if (__predict_false(data_len) < sizeof(struct rtvia)) {
+ *perror = EINVAL;
+ return (NULL);
+ }
+ data_len -= offsetof(struct rtvia, rtvia_addr);
+
+ switch (via->rtvia_family) {
+ case AF_INET:
+ if (__predict_false(data_len < sizeof(struct in_addr))) {
+ *perror = EINVAL;
+ return (NULL);
+ }
+ return (parse_rta_ip4(via->rtvia_addr, npt, perror));
+ case AF_INET6:
+ if (__predict_false(data_len < sizeof(struct in6_addr))) {
+ *perror = EINVAL;
+ return (NULL);
+ }
+ return (parse_rta_ip6(via->rtvia_addr, npt, perror));
+ default:
+ *perror = ENOTSUP;
+ return (NULL);
+ }
+}
+
+static uint32_t
+nl_rta_get_uint32(const struct rtattr *rta, int *perror)
+{
+ if (__predict_false(NL_RTA_DATA_LEN(rta) != sizeof(uint32_t))) {
+ RT_LOG(LOG_DEBUG2, "nla type %d size(%u) is not uint32",
+ rta->rta_type, NL_RTA_DATA_LEN(rta));
+ *perror = EINVAL;
+ return (0);
+ }
+ *perror = 0;
+ return (*((const uint32_t *)NL_RTA_DATA_CONST(rta)));
+}
+
+static struct ifnet *
+parse_rta_oif(const struct rtattr *rta, struct netlink_parse_tracker *npt, int *perror)
+{
+ uint32_t ifindex = nl_rta_get_uint32(rta, perror);
+
+ NET_EPOCH_ASSERT();
+
+ if (__predict_false(*perror != 0))
+ return (NULL);
+
+ return (ifnet_byindex(ifindex));
+}
+
+
+/*
+ * fibnum heuristics
+ *
+ * if (dump && rtm_table == 0 && !rta_table) RT_ALL_FIBS
+ * msg rtm_table RTA_TABLE result
+ * RTM_GETROUTE/dump 0 - RT_ALL_FIBS
+ * RTM_GETROUTE/dump 1 - 1
+ * RTM_GETROUTE/get 0 - 0
+ *
+ */
+
+/*
+ * Converts rtmsg message into rt_addrinfo.
+ */
+static int
+parse_rtmsg_nlattr(struct rtmsg *rtm, int len, struct rt_addrinfo *info,
+ uint16_t required_mask, struct netlink_parse_tracker *npt)
+{
+ struct sockaddr *sa;
+ int error = 0;
+ struct nlattr *nla, *nla_head;
+
+ len -= NETLINK_ALIGN(sizeof(struct rtmsg));
+ nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg)));
+
+ RT_LOG(LOG_DEBUG, "parse %p remaining_len %d", nla_head, len);
+
+ info->rti_fibnum = rtm->rtm_table;
+ info->rti_family = rtm->rtm_family;
+
+ if (info->rti_fibnum > V_rt_numfibs) {
+ RT_LOG(LOG_DEBUG, "incorrect fibnum: %u", info->rti_fibnum);
+ return (EINVAL);
+ }
+
+ NLA_FOREACH(nla, nla_head, len) {
+ struct rtattr *rta = (struct rtattr *)nla;
+ if (rta->rta_len < sizeof(struct rtattr)) {
+ RT_LOG(LOG_NOTICE, "invalid length for attribute %d, stopping processing",
+ rta->rta_type);
+ break;
+ }
+ RT_LOG(LOG_DEBUG2, "parse rta %d len %d", rta->rta_type, rta->rta_len);
+
+ switch (rta->rta_type) {
+ case NL_RTA_DST:
+ sa = parse_rta_ip(rta, npt, &error);
+ if (sa != NULL) {
+ info->rti_info[RTAX_DST] = sa;
+ info->rti_addrs |= RTA_DST;
+ /* XXX: check DST af */
+ }
+ break;
+ case NL_RTA_GATEWAY:
+ sa = parse_rta_ip(rta, npt, &error);
+ if (sa != NULL) {
+ info->rti_info[RTAX_GATEWAY] = sa;
+ info->rti_addrs |= RTA_GATEWAY;
+ info->rti_flags |= RTF_GATEWAY;
+ }
+ break;
+ case NL_RTA_VIA:
+ sa = parse_rta_via(rta, npt, &error);
+ info->rti_info[RTAX_GATEWAY] = sa;
+ info->rti_addrs |= RTA_GATEWAY;
+ info->rti_flags |= RTF_GATEWAY;
+ break;
+ case NL_RTA_OIF:
+ info->rti_ifp = parse_rta_oif(rta, npt, &error);
+ break;
+ case NL_RTA_TABLE:
+ info->rti_fibnum = nl_rta_get_uint32(rta, &error);
+ if (info->rti_fibnum > V_rt_numfibs) {
+ RT_LOG(LOG_DEBUG, "incorrect fibnum: %u", info->rti_fibnum);
+ error = EINVAL;
+ }
+ break;
+ default:
+ RT_LOG(LOG_DEBUG, "unsupported rta_type %d", rta->rta_type);
+ break;
+ }
+ if (__predict_false(error != 0)) {
+ break;
+ }
+ }
+ /* XXX: IPv6 embedding */
+
+ if (error != 0)
+ return (error);
+
+ if ((error == 0 ) && ((info->rti_addrs & required_mask) != required_mask)) {
+ RT_LOG(LOG_DEBUG, "required mask failed");
+ error = EINVAL;
+ }
+
+ return (error);
+}
+
+static int
+finalize_rtmsg(struct rtmsg *rtm, int len, struct rt_addrinfo *info,
+ struct netlink_parse_tracker *npt)
+{
+ struct sockaddr *sa;
+ int error = 0;
+
+ switch (rtm->rtm_family) {
+ case AF_INET:
+ if (rtm->rtm_dst_len < 32) {
+ sa = get_ip4_netmask(rtm->rtm_dst_len, npt, &error);
+ info->rti_info[RTAX_NETMASK] = sa;
+ info->rti_addrs |= RTA_NETMASK;
+ } else
+ info->rti_flags |= RTF_HOST;
+ break;
+ case AF_INET6:
+ if (rtm->rtm_dst_len < 32) {
+ sa = get_ip6_netmask(rtm->rtm_dst_len, npt, &error);
+ info->rti_info[RTAX_NETMASK] = sa;
+ info->rti_addrs |= RTA_NETMASK;
+ } else
+ info->rti_flags |= RTF_HOST;
+ /* XXX: embed IPv6 addrs */
+ break;
+
+ }
+
+ return (error);
+}
+
+/*
+ * Populates an addr_info struct from an rtmsg.
+ * Parses the nl_attributes and parses the netmask.
+ */
+static struct rt_addrinfo *
+get_info_from_rtmsg(struct nlmsghdr *hdr, uint16_t required_mask,
+ struct netlink_parse_tracker *npt)
+{
+ struct rt_addrinfo *info = npt_alloc(npt, sizeof(struct rt_addrinfo));
+
+ struct rtmsg *rtm = (struct rtmsg *)nlmsg_data(hdr);
+ int len = hdr->nlmsg_len - NLMSG_HDRLEN;
+
+ npt->error = parse_rtmsg_nlattr(rtm, len, info, required_mask, npt);
+ if (npt->error == 0)
+ npt->error = finalize_rtmsg(rtm, len, info, npt);
+
+ FIB_LOG(LOG_DEBUG2, info->rti_fibnum, info->rti_family, "errno=%d", npt->error);
+ if (npt->error == 0)
+ return (info);
+ return (NULL);
+}
+
+static struct nhop_object *
+rc_get_nhop(const struct rib_cmd_info *rc)
+{
+ return ((rc->rc_cmd == RTM_DELETE) ? rc->rc_nh_old : rc->rc_nh_new);
+}
+
+static bool
+dump_rc_nhop_gw(struct nlmsg_state *ns, struct nhop_object *nh)
+{
+ int upper_family;
+
+ switch (nhop_get_neigh_family(nh)) {
+ case AF_LINK:
+ /* onlink prefix, skip */
+ break;
+ case AF_INET:
+ if (!nla_put(ns, NL_RTA_GATEWAY, 4, &nh->gw4_sa.sin_addr))
+ return (false);
+ break;
+ case AF_INET6:
+ upper_family = nhop_get_upper_family(nh);
+ if (upper_family == AF_INET6) {
+ if (!nla_put(ns, NL_RTA_GATEWAY, 16, &nh->gw6_sa.sin6_addr))
+ return (false);
+ } else if (upper_family == AF_INET) {
+ /* IPv4 over IPv6 */
+ char buf[20];
+ struct rtvia *via = (struct rtvia *)&buf[0];
+ via->rtvia_family = AF_INET6;
+ memcpy(via->rtvia_addr, &nh->gw6_sa.sin6_addr, 16);
+ if (!nla_put(ns, NL_RTA_VIA, 17, via))
+ return (false);
+ } else {
+ /* shouldn't happen */
+ return (false);
+ }
+ break;
+ }
+
+ return (true);
+
+}
+
+
+static bool
+dump_rc_nhop(struct nlmsg_state *ns, const struct rib_cmd_info *rc)
+{
+ struct nhop_object *nh;
+
+ nh = rc_get_nhop(rc);
+ /* XXX: can be raw */
+
+ if (nh == NULL)
+ return (false);
+
+ /*
+ * IPv4 over IPv6
+ * ('RTA_VIA', {'family': 10, 'addr': 'fe80::20c:29ff:fe67:2dd'}), ('RTA_OIF', 2),
+ * IPv4 w/ gw
+ * ('RTA_GATEWAY', '172.16.107.131'), ('RTA_OIF', 2)],
+ * Direct route:
+ * ('RTA_OIF', 2)
+ */
+ if (nh->nh_flags & NHF_GATEWAY)
+ dump_rc_nhop_gw(ns, nh);
+
+ /* Add nhop id. XXX: Switch to user nhop id */
+ if (!nla_put_u32(ns, NL_RTA_NH_ID, nhop_get_idx(nh)))
+ return (false);
+
+ /* In any case, fill outgoing interface */
+ if (!nla_put_u32(ns, NL_RTA_OIF, nh->nh_ifp->if_index))
+ return (false);
+
+ return (true);
+}
+
+/*
+ * Dumps output from a rib command into an rtmsg
+ */
+
+static int
+dump_rc(uint32_t fibnum, const struct nlmsghdr *hdr,
+ const struct rib_cmd_info *rc, struct nlmsg_state *ns)
+{
+ const struct nhop_object *nh = rc_get_nhop(rc);
+ struct rtmsg *rtm;
+ int error = 0;
+
+ NET_EPOCH_ASSERT();
+
+ int payload_len = sizeof(struct rtmsg);
+ int nlmsgtype = get_rtmsg_type_from_rtsock(rc->rc_cmd);
+ if (!nlmsg_put(ns, hdr->nlmsg_pid, hdr->nlmsg_seq, nlmsgtype,
+ hdr->nlmsg_flags, payload_len))
+ goto enomem;
+
+ int family = rt_get_family(rc->rc_rt);
+ rtm = nlmsg_reserve_object(ns, struct rtmsg);
+ rtm->rtm_family = family;
+ rtm->rtm_dst_len = 0;
+ rtm->rtm_src_len = 0;
+ rtm->rtm_tos = 0;
+ if (fibnum < 255)
+ rtm->rtm_table = (unsigned char)fibnum;
+ rtm->rtm_protocol = get_rtm_protocol(nh);
+ rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+ rtm->rtm_type = get_rtm_type(nh);
+ rtm->rtm_flags = 0;
+
+ if (!nla_put_u32(ns, NL_RTA_TABLE, fibnum))
+ goto enomem;
+
+ int plen = 0;
+ uint32_t scopeid = 0;
+ switch (family) {
+ case AF_INET:
+ {
+ struct in_addr addr;
+ rt_get_inet_prefix_plen(rc->rc_rt, &addr, &plen, &scopeid);
+ rtm->rtm_dst_len = plen;
+ if (!nla_put(ns, NL_RTA_DST, 4, &addr))
+ goto enomem;
+ break;
+ }
+ case AF_INET6:
+ {
+ struct in6_addr addr;
+ rt_get_inet6_prefix_plen(rc->rc_rt, &addr, &plen, &scopeid);
+ rtm->rtm_dst_len = plen;
+ if (!nla_put(ns, NL_RTA_DST, 16, &addr))
+ goto enomem;
+ break;
+ }
+ default:
+ FIB_LOG(LOG_NOTICE, fibnum, family, "unknown rt family");
+ error = EAFNOSUPPORT;
+ goto flush;
+ }
+
+ if (!dump_rc_nhop(ns, rc))
+ goto enomem;
+
+/*
+ struct nlattr *metrics_nla;
+ metrics_nla = nla_nest_start(m, NL_RTA_METRICS);
+ nla_put_u32(m, NL_RTAX_MTU, nh->nh_mtu);
+ nla_nest_end(m, metrics_nla);
+*/
+ nlmsg_end(ns);
+ return (0);
+enomem:
+ error = ENOMEM;
+flush:
+ nlmsg_abort(ns);
+ return (error);
+}
+
+static int
+family_to_group(int family)
+{
+ switch (family) {
+ case AF_INET:
+ return (RTNLGRP_IPV4_ROUTE);
+ case AF_INET6:
+ return (RTNLGRP_IPV6_ROUTE);
+ }
+ return (0);
+}
+
+
+static void
+report_operation(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc,
+ struct nlpcb *nlp, struct nlmsghdr *hdr)
+{
+ struct nlmsg_state ns;
+
+ uint32_t group_mask = family_to_group(rt_get_family(rc->rc_rt));
+ if (nlmsg_get_group_writer(NLMSG_SMALL, group_mask, &ns)) {
+ dump_rc(fibnum, hdr, rc, &ns);
+ nlmsg_flush(&ns);
+ }
+}
+
+
+struct netlink_walkargs {
+ struct nlmsg_state ns;
+ struct rib_cmd_info rc;
+ struct nlmsghdr hdr;
+ struct nlpcb *nlp;
+ uint32_t fibnum;
+ int family;
+ int error;
+ int count;
+ int dumped;
+};
+
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG3
+static void
+print_hex(char *data, int len)
+{
+ unsigned char buffer[128], *ptr;
+
+ ptr = &buffer[0];
+
+ for (int i = 0; i < len; i++) {
+ printf(" WTF? %d %d\n", i, (int)(unsigned char)data[i]);
+ ptr += snprintf(ptr, 128, "%02X ", (unsigned char)data[i]);
+ }
+ *ptr = '\0';
+ RT_LOG(LOG_DEBUG2, "DBG: %s", buffer);
+}
+#endif
+
+static int
+dump_rtentry(struct rtentry *rt, void *_arg)
+{
+ struct netlink_walkargs *wa = (struct netlink_walkargs *)_arg;
+ int error;
+
+ wa->count++;
+ if (wa->error != 0)
+ return (0);
+ wa->dumped++;
+
+ wa->rc.rc_rt = rt;
+ wa->rc.rc_nh_new = rt_get_raw_nhop(rt);
+
+ error = dump_rc(wa->fibnum, &wa->hdr, &wa->rc, &wa->ns);
+
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG2
+ char rtbuf[INET6_ADDRSTRLEN + 5];
+ FIB_LOG(LOG_DEBUG2, wa->fibnum, wa->family, "Dump %s, offset %u, error %d",
+ rt_print_buf(rt, rtbuf, sizeof(rtbuf)), wa->ns.offset, error);
+#endif
+ wa->error = error;
+
+ return (0);
+}
+
+static void
+dump_rtable_one(struct netlink_walkargs *wa, uint32_t fibnum, int family)
+{
+ FIB_LOG(LOG_DEBUG, fibnum, family, "Start dump");
+ wa->count = 0;
+ wa->dumped = 0;
+
+ rib_walk(fibnum, family, false, dump_rtentry, wa);
+
+ FIB_LOG(LOG_DEBUG, fibnum, family, "End dump, iterated %d dumped %d",
+ wa->count, wa->dumped);
+ RT_LOG(LOG_DEBUG2, "Current offset: %d", wa->ns.offset);
+}
+
+static void
+dump_rtable_family(struct netlink_walkargs *wa, uint32_t fibnum, int family)
+{
+ wa->fibnum = fibnum;
+ wa->family = family;
+
+ if (rt_tables_get_rnh(fibnum, family) != 0) {
+ dump_rtable_one(wa, fibnum, family);
+ }
+}
+
+static int
+handle_rtm_getroute(struct nlpcb *nlp, struct rt_addrinfo *info, uint32_t fibnum,
+ struct nlmsghdr *hdr)
+{
+ RIB_RLOCK_TRACKER;
+ struct rib_head *rnh;
+ struct nhop_object *nh;
+ sa_family_t saf;
+
+ if (info->rti_info[RTAX_DST] == NULL) {
+ RT_LOG(LOG_WARNING, "No RTAX_DST supplied");
+ return (EINVAL);
+ }
+ saf = info->rti_info[RTAX_DST]->sa_family;
+
+ FIB_LOG(LOG_DEBUG, fibnum, saf, "getroute called");
+
+ rnh = rt_tables_get_rnh(fibnum, saf);
+ if (rnh == NULL)
+ return (EAFNOSUPPORT);
+
+ struct rib_cmd_info rc = {};
+
+ RIB_RLOCK(rnh);
+
+ if ((info->rti_addrs & RTA_NETMASK) == 0) {
+ rc.rc_rt = (struct rtentry *)rnh->rnh_matchaddr(
+ info->rti_info[RTAX_DST], &rnh->head);
+ } else
+ rc.rc_rt = (struct rtentry *)rnh->rnh_lookup(
+ info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK],
+ &rnh->head);
+
+ if (rc.rc_rt == NULL) {
+ RIB_RUNLOCK(rnh);
+ return (ESRCH);
+ }
+
+ nh = rt_get_raw_nhop(rc.rc_rt);
+ if (nh == NULL) {
+ RIB_RUNLOCK(rnh);
+ return (ESRCH);
+ }
+ rc.rc_nh_new = nh;
+ rc.rc_nh_weight = rc.rc_rt->rt_weight;
+ rc.rc_cmd = RTM_GET;
+ RIB_RUNLOCK(rnh);
+
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG
+ char rtbuf[INET6_ADDRSTRLEN + 5], nhbuf[INET6_ADDRSTRLEN + 5];
+ FIB_LOG(LOG_DEBUG, fibnum, saf, "getroute completed: got %s for %s",
+ nhop_print_buf(rc.rc_nh_new, nhbuf, sizeof(nhbuf)),
+ rt_print_buf(rc.rc_rt, rtbuf, sizeof(rtbuf)));
+#endif
+ struct nlmsg_state ns = {};
+ if (!nlmsg_get_socket_writer(NLMSG_SMALL, nlp, &ns))
+ return (ENOMEM);
+ dump_rc(fibnum, hdr, &rc, &ns);
+
+ return (0);
+}
+
+
+static int
+handle_rtm_filter(struct nlpcb *nlp, struct rt_addrinfo *info, uint32_t fibnum, int family,
+ struct nlmsghdr *hdr)
+{
+ struct netlink_walkargs wa = {
+ .nlp = nlp,
+ .rc.rc_cmd = RTM_ADD,
+ .hdr.nlmsg_pid = hdr->nlmsg_pid,
+ .hdr.nlmsg_seq = hdr->nlmsg_seq,
+ .hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI,
+ .fibnum = fibnum,
+ .family = family,
+ };
+
+ if (fibnum >= V_rt_numfibs) {
+ FIB_LOG(LOG_DEBUG, fibnum, family, "incorrect fibnum");
+ return (ENOENT);
+ }
+
+ if (!nlmsg_get_socket_writer(NLMSG_LARGE, nlp, &wa.ns)) {
+ RT_LOG(LOG_DEBUG, "error allocating mbuf");
+ return (ENOMEM);
+ }
+
+ if (family != AF_UNSPEC)
+ dump_rtable_family(&wa, fibnum, family);
+ else for (int i = 1; i < AF_MAX; i++) {
+ dump_rtable_family(&wa, fibnum, i);
+ if (wa.error != 0)
+ break;
+ }
+
+ if (wa.error == 0) {
+ if (!nlmsg_put(&wa.ns, wa.hdr.nlmsg_pid, wa.hdr.nlmsg_seq, NLMSG_DONE, 0, sizeof(int))) {
+ RT_LOG(LOG_DEBUG, "Fuck");
+ return (ENOMEM);
+ }
+ /* Save operation result */
+ int *perror = nlmsg_reserve_object(&wa.ns, int);
+ RT_LOG(LOG_DEBUG2, "record error=%d at off %d (%p)", wa.error,
+ wa.ns.offset, perror);
+ *perror = wa.error;
+ nlmsg_end(&wa.ns);
+ }
+ nlmsg_flush(&wa.ns);
+
+ return (wa.error);
+}
+
+
+static int
+rtnl_handle_newroute(struct nlmsghdr *hdr, struct nlpcb *nlp, struct netlink_parse_tracker *npt
+)
+{
+ struct rib_cmd_info rc;
+ int error;
+
+ struct rt_addrinfo *info = get_info_from_rtmsg(hdr, RTA_DST|RTA_GATEWAY, npt);
+ if (info == NULL)
+ return (EINVAL);
+
+ error = rib_action(info->rti_fibnum, RTM_ADD, info, &rc);
+ if (error == 0)
+ report_operation(info->rti_fibnum, info, &rc, nlp, hdr);
+ return (error);
+}
+
+static int
+rtnl_handle_delroute(struct nlmsghdr *hdr, struct nlpcb *nlp, struct netlink_parse_tracker *npt)
+{
+ struct rib_cmd_info rc;
+ int error;
+
+ struct rt_addrinfo *info = get_info_from_rtmsg(hdr, RTA_DST, npt);
+ if (info == NULL)
+ return (EINVAL);
+
+ /* XX */
+
+ error = rib_action(info->rti_fibnum, RTM_DELETE, info, &rc);
+ if (error == 0)
+ report_operation(info->rti_fibnum, info, &rc, nlp, hdr);
+ return (error);
+}
+
+static int
+rtnl_handle_getroute(struct nlmsghdr *hdr, struct nlpcb *nlp, struct netlink_parse_tracker *npt)
+{
+ struct rt_addrinfo *info;
+ int error = EINVAL;
+
+ if (hdr->nlmsg_flags & NLM_F_DUMP) {
+ info = get_info_from_rtmsg(hdr, 0, npt);
+ if (info != NULL)
+ error = handle_rtm_filter(nlp, info, info->rti_fibnum, info->rti_family, hdr);
+ } else {
+ info = get_info_from_rtmsg(hdr, RTA_DST, npt);
+ if (info != NULL)
+ error = handle_rtm_getroute(nlp, info, info->rti_fibnum, hdr);
+ }
+
+ return (error);
+}
+
+static int
+rtnl_handle_message(struct nlmsghdr *hdr, struct nlpcb *nlp, struct netlink_parse_tracker *npt)
+{
+ int error = 0;
+
+ RT_LOG(LOG_DEBUG2, "received msg type %d (pid %u)", hdr->nlmsg_type,
+ hdr->nlmsg_pid);
+ /* XXX: check min header length */
+ switch (hdr->nlmsg_type) {
+ case NL_RTM_NEWROUTE:
+ error = rtnl_handle_newroute(hdr, nlp, npt);
+ break;
+ case NL_RTM_DELROUTE:
+ error = rtnl_handle_delroute(hdr, nlp, npt);
+ break;
+ case NL_RTM_GETROUTE:
+ error = rtnl_handle_getroute(hdr, nlp, npt);
+ break;
+ case NL_RTM_GETLINK:
+ error = rtnl_handle_getlink(hdr, nlp, npt);
+ break;
+ case NL_RTM_GETADDR:
+ error = rtnl_handle_getaddr(hdr, nlp, npt);
+ break;
+ case NL_RTM_NEWNEXTHOP:
+ error = rtnl_handle_newnhop(hdr, nlp, npt);
+ default:
+ RT_LOG(LOG_DEBUG, "msg type %d unsupported (pid %u)",
+ hdr->nlmsg_type, hdr->nlmsg_pid);
+ error = EOPNOTSUPP;
+ }
+
+ return (error);
+}
+
+/*
+ * Handler called by netlink subsystem when matching netlink message is received
+ */
+static int
+rtnl_receive_message(struct nlmsghdr *hdr, struct netlink_parse_tracker *npt)
+{
+ struct epoch_tracker et;
+ int error;
+
+ NET_EPOCH_ENTER(et);
+ error = rtnl_handle_message(hdr, npt->nlp, npt);
+ NET_EPOCH_EXIT(et);
+
+ return (error);
+}
+
+static void
+handle_route_event(uint32_t fibnum, const struct rt_addrinfo *info,
+ const struct rib_cmd_info *rc)
+{
+ int family, nlm_flags = 0;
+
+ struct nlmsg_state ns;
+
+ family = rt_get_family(rc->rc_rt);
+
+ /* XXX: check if there are active listeners first */
+
+ /* TODO: consider passing PID/type/seq */
+ switch (rc->rc_cmd) {
+ case RTM_ADD:
+ nlm_flags = NLM_F_EXCL | NLM_F_CREATE;
+ break;
+ case RTM_CHANGE:
+ nlm_flags = NLM_F_REPLACE;
+ break;
+ case RTM_DELETE:
+ nlm_flags = 0;
+ break;
+ }
+#if DEBUG_MAX_LEVEL >= LOG_DEBUG
+ char rtbuf[INET6_ADDRSTRLEN + 5];
+ FIB_LOG(LOG_DEBUG, fibnum, family, "received event %s for %s / nlm_flags=%X",
+ rib_print_cmd(rc->rc_cmd), rt_print_buf(rc->rc_rt, rtbuf, sizeof(rtbuf)),
+ nlm_flags);
+#endif
+ struct nlmsghdr hdr = {
+ .nlmsg_flags = nlm_flags,
+ };
+
+ uint32_t group_mask = family_to_group(family);
+
+ if (!nlmsg_get_group_writer(NLMSG_SMALL, group_mask, &ns)) {
+ RT_LOG(LOG_DEBUG, "error allocating mbuf");
+ return;
+ }
+
+ dump_rc(fibnum, &hdr, rc, &ns);
+ nlmsg_flush(&ns);
+}
+
+static void nlbridge_cb_func(uint32_t event_type, uint32_t fibnum,
+ const struct rt_addrinfo *info, const struct rib_cmd_info *rc, void *arg)
+{
+ RT_LOG(LOG_DEBUG2, "received bridge event %d", event_type);
+ switch (event_type) {
+ case NLBR_EVENT_ROUTE:
+ handle_route_event(fibnum, info, rc);
+ break;
+ }
+}
+
+static struct rib_event_bridge nlbridge = {
+ .reb_cb = nlbridge_cb_func,
+ .reb_cb_arg = NULL,
+ .reb_provider_id = NLBR_PROVIDER_NETLINK,
+};
+
+static void
+rtnl_load(void *u __unused)
+{
+ RT_LOG(LOG_ERR, "netlink support is in ALPHA stage");
+ RT_LOG(LOG_NOTICE, "rtnl loading");
+ rib_bridge_link(&nlbridge);
+ netlink_register_proto(NETLINK_ROUTE, rtnl_receive_message);
+}
+SYSINIT(rtnl_load, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rtnl_load, NULL);
+
+static void
+rtnl_unload(void *u __unused)
+{
+ rib_bridge_unlink(&nlbridge);
+
+ /* Wait till all consumers read nlbridge data */
+ epoch_wait_preempt(net_epoch_preempt);
+}
+SYSUNINIT(rtnl_unload, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rtnl_unload, NULL);
Index: sys/netlink/netlink_var.h
===================================================================
--- /dev/null
+++ sys/netlink/netlink_var.h
@@ -0,0 +1,205 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 Ng Peng Nam Sean
+ * Copyright (c) 2022 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _NETLINK_NETLINK_VAR_H_
+#define _NETLINK_NETLINK_VAR_H_
+
+#include <sys/epoch.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+#include <net/vnet.h>
+
+MALLOC_DECLARE(M_NETLINK);
+
+#define NLSNDQ 65536 /* Default socket sendspace */
+#define NLRCVQ 65536 /* Default socket recvspace */
+
+struct nlpcb {
+ struct socket *nl_socket;
+ uint32_t nl_port;
+ uint32_t nl_groups;
+ uint32_t nl_flags;
+ uint32_t nl_process_id;
+ int nl_proto;
+ bool nl_active;
+ bool nl_task_pending;
+ bool nl_linux; /* true if running under compat */
+ struct mbuf *nl_queue_head;
+ struct mbuf *nl_queue_last;
+ int64_t nl_queue_length;
+ struct taskqueue *nl_taskqueue;
+ struct task nl_task;
+ CK_LIST_ENTRY(nlpcb) nl_next;
+ CK_LIST_ENTRY(nlpcb) nl_port_next;
+ volatile u_int nl_refcount;
+ struct mtx nl_lock;
+ struct epoch_context nl_epoch_ctx;
+};
+#define sotonlpcb(so) ((struct nlpcb *)(so)->so_pcb)
+
+#define NLP_LOCK_INIT(_nlp) mtx_init(&((_nlp)->nl_lock), "nlp mtx", NULL, MTX_DEF)
+#define NLP_LOCK_DESTROY(_nlp) mtx_destroy(&((_nlp)->nl_lock))
+#define NLP_LOCK(_nlp) mtx_lock(&((_nlp)->nl_lock))
+#define NLP_UNLOCK(_nlp) mtx_unlock(&((_nlp)->nl_lock))
+
+#define ALIGNED_NL_SZ(_data) roundup2((((struct nlmsghdr *)(_data))->nlmsg_len), 16)
+
+#define NLF_CAP_ACK 0x01 /* Do not send message body with errmsg */
+#define NLF_EXT_ACK 0x02 /* Allow including extended TLVs in ack */
+
+#define NETISR_NETLINK 15 // XXX hack, must be unused and < 16
+
+
+SYSCTL_DECL(_net_netlink);
+
+struct nl_io {
+ struct callout callout;
+ struct mbuf *head;
+ struct mbuf *last;
+ int64_t length;
+};
+
+
+struct nl_control {
+ CK_LIST_HEAD(nl_pid_head, nlpcb) ctl_port_head;
+ CK_LIST_HEAD(nlpcb_head, nlpcb) ctl_pcb_head;
+ CK_LIST_ENTRY(nl_control) ctl_next;
+ struct nl_io ctl_io;
+ struct rmlock ctl_lock;
+};
+VNET_DECLARE(struct nl_control *, nl_ctl);
+#define V_nl_ctl VNET(nl_ctl)
+
+
+/* locking */
+#define CTL_TRACKER struct rm_priotracker nl_tracker
+#define CTL_RLOCK() rm_rlock(&V_nl_ctl->ctl_lock, &nl_tracker)
+#define CTL_RUNLOCK() rm_runlock(&V_nl_ctl->ctl_lock, &nl_tracker)
+
+#define CTL_WLOCK() rm_wlock(&V_nl_ctl->ctl_lock)
+#define CTL_WUNLOCK() rm_wunlock(&V_nl_ctl->ctl_lock)
+
+struct sockaddr_nl;
+struct sockaddr;
+struct nlmsghdr;
+
+/* Parsing state */
+
+struct linear_buffer {
+ char *base; /* Base allocated memory pointer */
+ uint32_t offset; /* Currently used offset */
+ uint32_t size; /* Total buffer size */
+};
+
+static inline void *
+lb_alloc(struct linear_buffer *lb, int len)
+{
+ len = roundup2(len, sizeof(uint64_t));
+ if (lb->offset + len > lb->size)
+ return (NULL);
+ void *data = (void *)(lb->base + lb->offset);
+ lb->offset += len;
+ return (data);
+}
+
+static inline void
+lb_clear(struct linear_buffer *lb)
+{
+ memset(lb->base, 0, lb->size);
+ lb->offset = 0;
+}
+
+#define SCRATCH_BUFFER_SIZE 1024
+struct netlink_parse_tracker {
+ struct linear_buffer lb; /* Per-message scratch buffer */
+ struct nlpcb *nlp; /* Originator */
+ struct nlmsghdr *hdr; /* Current message being parsed */
+ int error; /* last operation error */
+};
+
+static inline void *
+npt_alloc(struct netlink_parse_tracker *npt, int len)
+{
+ return (lb_alloc(&npt->lb, len));
+}
+#define npt_alloc_sockaddr(_npt, _len) ((struct sockaddr *)(npt_alloc(_npt, _len)))
+
+/* netlink_netisr.c */
+void netlink_netisr_init(void);
+void netlink_netisr_destroy(void);
+void netlink_netisr_vnet_init(void);
+void netlink_netisr_vnet_destroy(void);
+int nl_send_msg(struct mbuf *m, uint32_t group_mask);
+void nl_msg_from_netlink(struct mbuf *m);
+
+extern struct netisr_handler nlsock_nh;
+
+/* netlink_io.c */
+void nl_taskqueue_handler(void *_arg, int pending);
+int nl_receive_async(struct mbuf *m, struct socket *so);
+void nl_process_receive_locked(struct nlpcb *nlp);
+
+/* netlink_iface.c */
+struct rt_addrinfo;
+int rtnl_handle_getlink(struct nlmsghdr *hdr, struct nlpcb *nlp,
+ struct netlink_parse_tracker *npt);
+int rtnl_handle_getaddr(struct nlmsghdr *hdr, struct nlpcb *nlp,
+ struct netlink_parse_tracker *npt);
+
+/* netlink_module.c */
+void vnet_nl_ctl_init(void);
+
+int nl_verify_proto(int proto);
+
+extern int netlink_unloading;
+
+#define NL_MAX_HANDLERS 100
+typedef int (*nl_handler)(struct nlmsghdr *hdr, struct netlink_parse_tracker *npt);
+extern nl_handler nl_handlers[NL_MAX_HANDLERS];
+
+/* netlink_nhop.c */
+int rtnl_handle_newnhop(struct nlmsghdr *hdr, struct nlpcb *nlp,
+ struct netlink_parse_tracker *npt);
+
+/* XXX PUBLIC headers */
+struct nlpcb;
+void nl_send_group(struct mbuf *m, uint32_t group_mask);
+bool nl_send_one(struct mbuf *m, struct nlpcb *nlp);
+void nlmsg_ack(struct nlpcb *rp, int error, struct nlmsghdr *nlmsg);
+
+
+/* Still public headers */
+bool netlink_register_proto(int proto, nl_handler handle);
+bool netlink_unregister_proto(int proto);
+
+/* Linux compat */
+struct nlmsghdr *nlmsg_from_linux(struct nlmsghdr *hdr,
+ struct netlink_parse_tracker *npt);
+struct mbuf *nlmsgs_to_linux(char *buf, int buflen, struct nlpcb *nlp,
+ struct linear_buffer *lb);
+
+#endif
Index: sys/netlink/route/base.h
===================================================================
--- /dev/null
+++ sys/netlink/route/base.h
@@ -0,0 +1,262 @@
+
+
+#ifndef _NETLINK_ROUTE_ROUTE_BASE_H_
+#define _NETLINK_ROUTE_ROUTE_BASE_H_
+
+
+/*
+ * Messages defined by the NETLINK_ROUTE subsystem
+ */
+
+enum {
+ NL_RTM_BASE = 16,
+#define NL_RTM_BASE NL_RTM_BASE
+ NL_RTM_NEWLINK = 16,
+#define NL_RTM_NEWLINK NL_RTM_NEWLINK
+ NL_RTM_DELLINK,
+#define NL_RTM_DELLINK NL_RTM_DELLINK
+ NL_RTM_GETLINK,
+#define NL_RTM_GETLINK NL_RTM_GETLINK
+ NL_RTM_SETLINK,
+#define NL_RTM_SETLINK NL_RTM_SETLINK
+ NL_RTM_NEWADDR = 20,
+#define NL_RTM_NEWADDR NL_RTM_NEWADDR
+ NL_RTM_DELADDR,
+#define NL_RTM_DELADDR NL_RTM_DELADDR
+ NL_RTM_GETADDR,
+#define NL_RTM_GETADDR NL_RTM_GETADDR
+ NL_RTM_NEWROUTE = 24,
+#define NL_RTM_NEWROUTE NL_RTM_NEWROUTE
+ NL_RTM_DELROUTE,
+#define NL_RTM_DELROUTE NL_RTM_DELROUTE
+ NL_RTM_GETROUTE,
+#define NL_RTM_GETROUTE NL_RTM_GETROUTE
+ NL_RTM_NEWNEIGH = 28,
+#define NL_RTM_NEWNEIGH NL_RTM_NEWNEIGH
+ NL_RTM_DELNEIGH,
+#define NL_RTM_DELNEIGH NL_RTM_DELNEIGH
+ NL_RTM_GETNEIGH,
+#define NL_RTM_GETNEIGH NL_RTM_GETNEIGH
+ NL_RTM_NEWRULE = 32,
+#define NL_RTM_NEWRULE NL_RTM_NEWRULE
+ NL_RTM_DELRULE,
+#define NL_RTM_DELRULE NL_RTM_DELRULE
+ NL_RTM_GETRULE,
+#define NL_RTM_GETRULE NL_RTM_GETRULE
+ NL_RTM_NEWQDISC = 36,
+#define NL_RTM_NEWQDISC NL_RTM_NEWQDISC
+ NL_RTM_DELQDISC,
+#define NL_RTM_DELQDISC NL_RTM_DELQDISC
+ NL_RTM_GETQDISC,
+#define NL_RTM_GETQDISC NL_RTM_GETQDISC
+ NL_RTM_NEWTCLASS = 40,
+#define NL_RTM_NEWTCLASS NL_RTM_NEWTCLASS
+ NL_RTM_DELTCLASS,
+#define NL_RTM_DELTCLASS NL_RTM_DELTCLASS
+ NL_RTM_GETTCLASS,
+#define NL_RTM_GETTCLASS NL_RTM_GETTCLASS
+ NL_RTM_NEWTFILTER = 44,
+#define NL_RTM_NEWTFILTER NL_RTM_NEWTFILTER
+ NL_RTM_DELTFILTER,
+#define NL_RTM_DELTFILTER NL_RTM_DELTFILTER
+ NL_RTM_GETTFILTER,
+#define NL_RTM_GETTFILTER NL_RTM_GETTFILTER
+ NL_RTM_NEWACTION = 48,
+#define NL_RTM_NEWACTION NL_RTM_NEWACTION
+ NL_RTM_DELACTION,
+#define NL_RTM_DELACTION NL_RTM_DELACTION
+ NL_RTM_GETACTION,
+#define NL_RTM_GETACTION NL_RTM_GETACTION
+ NL_RTM_NEWPREFIX = 52,
+#define NL_RTM_NEWPREFIX NL_RTM_NEWPREFIX
+ NL_RTM_GETMULTICAST = 58,
+#define NL_RTM_GETMULTICAST NL_RTM_GETMULTICAST
+ NL_RTM_GETANYCAST = 62,
+#define NL_RTM_GETANYCAST NL_RTM_GETANYCAST
+ NL_RTM_NEWNEIGHTBL = 64,
+#define NL_RTM_NEWNEIGHTBL NL_RTM_NEWNEIGHTBL
+ NL_RTM_GETNEIGHTBL = 66,
+#define NL_RTM_GETNEIGHTBL NL_RTM_GETNEIGHTBL
+ NL_RTM_SETNEIGHTBL,
+#define NL_RTM_SETNEIGHTBL NL_RTM_SETNEIGHTBL
+ NL_RTM_NEWNDUSEROPT = 68,
+#define NL_RTM_NEWNDUSEROPT NL_RTM_NEWNDUSEROPT
+ NL_RTM_NEWADDRLABEL = 72,
+#define NL_RTM_NEWADDRLABEL NL_RTM_NEWADDRLABEL
+ NL_RTM_DELADDRLABEL,
+#define NL_RTM_DELADDRLABEL NL_RTM_DELADDRLABEL
+ NL_RTM_GETADDRLABEL,
+#define NL_RTM_GETADDRLABEL NL_RTM_GETADDRLABEL
+ NL_RTM_GETDCB = 78,
+#define NL_RTM_GETDCB NL_RTM_GETDCB
+ NL_RTM_SETDCB,
+#define NL_RTM_SETDCB NL_RTM_SETDCB
+ NL_RTM_NEWNETCONF = 80,
+#define NL_RTM_NEWNETCONF NL_RTM_NEWNETCONF
+ NL_RTM_GETNETCONF = 82,
+#define NL_RTM_GETNETCONF NL_RTM_GETNETCONF
+ NL_RTM_NEWMDB = 84,
+#define NL_RTM_NEWMDB NL_RTM_NEWMDB
+ NL_RTM_DELMDB = 85,
+#define NL_RTM_DELMDB NL_RTM_DELMDB
+ NL_RTM_GETMDB = 86,
+#define NL_RTM_GETMDB NL_RTM_GETMDB
+ NL_RTM_NEWNSID = 88,
+#define NL_RTM_NEWNSID NL_RTM_NEWNSID
+ NL_RTM_DELNSID = 89,
+#define NL_RTM_DELNSID NL_RTM_DELNSID
+ NL_RTM_GETNSID = 90,
+#define NL_RTM_GETNSID NL_RTM_GETNSID
+ NL_RTM_NEWSTATS = 92,
+#define NL_RTM_NEWSTATS NL_RTM_NEWSTATS
+ NL_RTM_GETSTATS = 94,
+#define NL_RTM_GETSTATS NL_RTM_GETSTATS
+ NL_RTM_NEWNEXTHOP = 104,
+#define NL_RTM_NEWNEXTHOP NL_RTM_NEWNEXTHOP
+ NL_RTM_DELNEXTHOP,
+#define NL_RTM_DELNEXTHOP NL_RTM_DELNEXTHOP
+ NL_RTM_GETNEXTHOP,
+#define NL_RTM_GETNEXTHOP NL_RTM_GETNEXTHOP
+ __NL_RTM_MAX,
+#define NL_RTM_MAX (((__NL_RTM_MAX + 3) & ~3) - 1)
+};
+
+#ifndef _KERNEL
+/*
+ * RTM_* namespace clashes with BSD rtsock namespace.
+ * Use NL_RTM_ prefix in the kernel and map it to RTM_
+ * for userland.
+ */
+#define RTM_BASE NL_RTM_BASE
+#define RTM_NEWLINK NL_RTM_NEWLINK
+#define RTM_DELLINK NL_RTM_DELLINK
+#define RTM_GETLINK NL_RTM_GETLINK
+#define RTM_SETLINK NL_RTM_SETLINK
+#define RTM_NEWADDR NL_RTM_NEWADDR
+#define RTM_DELADDR NL_RTM_DELADDR
+#define RTM_GETADDR NL_RTM_GETADDR
+#define RTM_NEWROUTE NL_RTM_NEWROUTE
+#define RTM_DELROUTE NL_RTM_DELROUTE
+#define RTM_GETROUTE NL_RTM_GETROUTE
+#endif
+
+
+/*
+ * route attribute header
+ */
+struct rtattr {
+ unsigned short rta_len;
+ unsigned short rta_type;
+};
+
+#ifndef _roundup2
+#define _roundup2(x, y) (((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */
+#endif
+#define NL_RTA_ALIGN_SIZE sizeof(uint32_t)
+#define NL_RTA_ALIGN(_len) _roundup2(_len, NL_RTA_ALIGN_SIZE)
+#define _NL_RTA_BASE_LEN NL_RTA_ALIGN(sizeof(struct rtattr))
+#define NL_RTA_DATA_LEN(_rta) ((int)((_rta)->rta_len - _NL_RTA_BASE_LEN))
+#define NL_RTA_DATA(_rta) ((void *)((char *)(_rta) + _NL_RTA_BASE_LEN))
+#define NL_RTA_DATA_CONST(_rta) ((const void *)((const char *)(_rta) + _NL_RTA_BASE_LEN))
+
+
+/* Compatibility attribute handling helpers */
+#ifndef _KERNEL
+#define RTA_ALIGNTO NL_RTA_ALIGN_SIZE
+#define RTA_ALIGN(_len) NL_RTA_ALIGN(_len)
+#define _RTA_ALIGNED_LEN(_rta) RTA_ALIGN((_rta)->rta_len)
+#define _RTA_OK(_rta, _len, _sz) \
+ (((_len) >= (_sz)) && ((_rta)->rta_len >= (_sz)) && ((_rta)->rta_len <= (_len)))
+#define RTA_OK(_rta, _len) _RTA_OK(_rta, _len, sizeof(struct rtattr))
+#define RTA_NEXT(_rta, _len) \
+ ((_len) -= RTA_ALIGNED_LEN, (_rta) = (struct rtattr *)((char *)(_rta) + RTA_ALIGNED_LEN))
+
+#define RTA_LENGTH(_len) (sizeof(struct rtattr) + (_len))
+#define RTA_SPACE(_len) RTA_ALIGN(RTA_LENGTH(_len))
+#define RTA_DATA(_rta) NL_RTA_DATA(_rta)
+#define RTA_PAYLOAD(_rta) ((int)((_rta)->rta_len) - sizeof(struct rtattr))
+
+#define RTM_RTA(_rtm) \
+ ((struct rtattr *)(((char *)(_rtm)) + NLMSG_ALIGN(sizeof(struct rtmsg))))
+#define RTM_PAYLOAD(_msg) NLMSG_PAYLOAD((_msg), sizeof(struct rtmsg))
+#endif
+
+
+enum rtattr_type_t {
+ NL_RTA_UNSPEC,
+ NL_RTA_DST,
+ NL_RTA_SRC,
+ NL_RTA_IIF,
+ NL_RTA_OIF,
+ NL_RTA_GATEWAY,
+ NL_RTA_PRIORITY,
+ NL_RTA_PREFSRC,
+ NL_RTA_METRICS,
+ NL_RTA_MULTIPATH,
+ NL_RTA_PROTOINFO, /* not used / deprecated */
+ NL_RTA_FLOW,
+ NL_RTA_CACHEINFO, /* not used */
+ NL_RTA_SESSION, /* not used / deprecated */
+ NL_RTA_MP_ALGO, /* not used / deprecated */
+ NL_RTA_TABLE,
+ NL_RTA_MARK, /* not used */
+ NL_RTA_MFC_STATS,
+ NL_RTA_VIA,
+ NL_RTA_NEWDST,
+ NL_RTA_PREF,
+ NL_RTA_ENCAP_TYPE,
+ NL_RTA_ENCAP,
+ NL_RTA_EXPIRES,
+ NL_RTA_PAD,
+ NL_RTA_UID,
+ NL_RTA_TTL_PROPAGATE,
+ NL_RTA_IP_PROTO,
+ NL_RTA_SPORT,
+ NL_RTA_DPORT,
+ NL_RTA_NH_ID,
+ __RTA_MAX
+};
+#define NL_RTA_MAX (__RTA_MAX - 1)
+
+#ifndef _KERNEL
+/*
+ * RTA_* space has clashes with rtsock namespace.
+ * Use NL_RTA_ prefix in the kernel and map to
+ * RTA_ for userland.
+ */
+#define RTA_UNSPEC NL_RTA_UNSPEC
+#define RTA_DST NL_RTA_DST
+#define RTA_SRC NL_RTA_SRC
+#define RTA_IIF NL_RTA_IIF
+#define RTA_OIF NL_RTA_OIF
+#define RTA_GATEWAY NL_RTA_GATEWAY
+#define RTA_PRIORITY NL_RTA_PRIORITY
+#define RTA_PREFSRC NL_RTA_PREFSRC
+#define RTA_METRICS NL_RTA_METRICS
+#define RTA_MULTIPATH NL_RTA_MULTIPATH
+#define RTA_PROTOINFO NL_RTA_PROTOINFO
+#define RTA_FLOW NL_RTA_FLOW
+#define RTA_CACHEINFO NL_RTA_CACHEINFO
+#define RTA_SESSION NL_RTA_SESSION
+#define RTA_MP_ALGO NL_RTA_MP_ALGO
+#define RTA_TABLE NL_RTA_TABLE
+#define RTA_MARK NL_RTA_MARK
+#define RTA_MFC_STATS NL_RTA_MFC_STATS
+#define RTA_VIA NL_RTA_VIA
+#define RTA_NEWDST NL_RTA_NEWDST
+#define RTA_PREF NL_RTA_PREF
+#define RTA_ENCAP_TYPE NL_RTA_ENCAP_TYPE
+#define RTA_ENCAP NL_RTA_ENCAP
+#define RTA_EXPIRES NL_RTA_EXPIRES
+#define RTA_PAD NL_RTA_PAD
+#define RTA_UID NL_RTA_UID
+#define RTA_TTL_PROPAGATE NL_RTA_TTL_PROPAGATE
+#define RTA_IP_PROTO NL_RTA_IP_PROTO
+#define RTA_SPORT NL_RTA_SPORT
+#define RTA_DPORT NL_RTA_DPORT
+#define RTA_NH_ID NL_RTA_NH_ID
+#define RTA_MAX NL_RTA_MAX
+#endif
+
+
+#endif
\ No newline at end of file
Index: sys/netlink/route/nhop.h
===================================================================
--- /dev/null
+++ sys/netlink/route/nhop.h
@@ -0,0 +1,27 @@
+#ifndef _NETLINK_ROUTE_NHOP_H_
+#define _NETLINK_ROUTE_NHOP_H_
+
+struct nhmsg {
+ unsigned char nh_family;
+ unsigned char nh_scope; /* return only */
+ unsigned char nh_protocol; /* Routing protocol that installed nh */
+ unsigned char resvd;
+ unsigned int nh_flags; /* RTNH_F flags */
+};
+
+/* entry in a nexthop group */
+struct nexthop_grp {
+ __u32 id; /* nexthop userland index */
+ __u8 weight; /* weight of this nexthop */
+ __u8 resvd1;
+ __u16 resvd2;
+};
+
+enum {
+ NEXTHOP_GRP_TYPE_MPATH, /* default nexthop group */
+ NEXTHOP_GRP_TYPE_RES, /* resilient nexthop group */
+ __NEXTHOP_GRP_TYPE_MAX,
+};
+#define NEXTHOP_GRP_TYPE_MAX (__NEXTHOP_GRP_TYPE_MAX - 1)
+
+#endif
\ No newline at end of file
Index: sys/netlink/route/route.h
===================================================================
--- /dev/null
+++ sys/netlink/route/route.h
@@ -0,0 +1,236 @@
+#ifndef _NETLINK_ROUTE_ROUTE_H_
+#define _NETLINK_ROUTE_ROUTE_H_
+
+/*
+ * Headers and attributes necessary for handling
+ * RTM_NEWROUTE|RTM_DELROUTE|RTM_GETROUTE
+ */
+
+/*
+ * Routing message header
+ */
+struct rtmsg {
+ unsigned char rtm_family; /* address family */
+ unsigned char rtm_dst_len; /* Prefix length */
+ unsigned char rtm_src_len; /* Source prefix length (not used) */
+ unsigned char rtm_tos; /* Type of service (not used) */
+ unsigned char rtm_table; /* rtable id */
+ unsigned char rtm_protocol; /* Routing protocol id (RTPROT_) */
+ unsigned char rtm_scope; /* Route distance (RT_SCOPE_) */
+ unsigned char rtm_type; /* Route type (RTN_) */
+ unsigned rtm_flags; /* Route flags (RTM_F_) */
+};
+
+/*
+ * RFC 3549, 3.1.1, route type (rtm_type field).
+ */
+enum {
+ RTN_UNSPEC,
+ RTN_UNICAST, /* Unicast route */
+ RTN_LOCAL, /* Accept locally (not supported) */
+ RTN_BROADCAST, /* Accept locally as broadcast, send as broadcast */
+ RTN_ANYCAST, /* Accept locally as broadcast, but send as unicast */
+ RTN_MULTICAST, /* Multicast route */
+ RTN_BLACKHOLE, /* Drop traffic towards destination */
+ RTN_UNREACHABLE,/* Destination is unreachable */
+ RTN_PROHIBIT, /* Administratively prohibited */
+ RTN_THROW, /* Not in this table (not supported) */
+ RTN_NAT, /* Translate this address (not supported) */
+ RTN_XRESOLVE, /* Use external resolver (not supported) */
+ __RTN_MAX,
+};
+#define RTN_MAX (__RTN_MAX - 1)
+
+/*
+ * RFC 3549, 3.1.1, protocol (Identifies what/who added the route).
+ * Values larger than RTPROT_STATIC(4) are not interpreted by the
+ * kernel, they are just for user information.
+ */
+
+#define RTPROT_UNSPEC 0
+#define RTPROT_REDIRECT 1 /* Route installed by ICMP redirect */
+#define RTPROT_KERNEL 2 /* Route installed by kernel */
+#define RTPROT_BOOT 3 /* Route installed during boot */
+#define RTPROT_STATIC 4 /* Route installed by administrator */
+
+#define RTPROT_GATED 8 /* Apparently, GateD */
+#define RTPROT_RA 9 /* RDISC/ND router advertisements */
+#define RTPROT_MRT 10 /* Merit MRT */
+#define RTPROT_ZEBRA 11 /* Zebra */
+#define RTPROT_BIRD 12 /* BIRD */
+#define RTPROT_DNROUTED 13 /* DECnet routing daemon */
+#define RTPROT_XORP 14 /* XORP */
+#define RTPROT_NTK 15 /* Netsukuku */
+#define RTPROT_DHCP 16 /* DHCP client */
+#define RTPROT_MROUTED 17 /* Multicast daemon */
+#define RTPROT_KEEPALIVED 18 /* Keepalived daemon */
+#define RTPROT_BABEL 42 /* Babel daemon */
+#define RTPROT_OPENR 99 /* Open Routing (Open/R) Routes */
+#define RTPROT_BGP 186 /* BGP Routes */
+#define RTPROT_ISIS 187 /* ISIS Routes */
+#define RTPROT_OSPF 188 /* OSPF Routes */
+#define RTPROT_RIP 189 /* RIP Routes */
+#define RTPROT_EIGRP 192 /* EIGRP Routes */
+
+/*
+ * RFC 3549 3.1.1 Route scope (valid distance to destination).
+ *
+ * The values between RT_SCOPE_UNIVERSE(0) and RT_SCOPE_SITE(200)
+ * are available to the user.
+*/
+enum rt_scope_t {
+ RT_SCOPE_UNIVERSE = 0,
+ /* User defined values */
+ RT_SCOPE_SITE = 200,
+ RT_SCOPE_LINK = 253,
+ RT_SCOPE_HOST = 254,
+ RT_SCOPE_NOWHERE = 255
+};
+
+/*
+ * RFC 3549 3.1.1 Route flags.
+*/
+#define RTM_F_NOTIFY 0x100 /* Notify user of route change */
+#define RTM_F_CLONED 0x200 /* This route is cloned (not used) */
+#define RTM_F_EQUALIZE 0x400 /* Multipath equalizer: NI */
+#define RTM_F_PREFIX 0x800 /* Prefix addresses */
+#define RTM_F_LOOKUP_TABLE 0x1000 /* set tableid to FIB lookup result */
+#define RTM_F_FIB_MATCH 0x2000 /* return full fib lookup match */
+#define RTM_F_OFFLOAD 0x4000 /* route is offloaded */
+#define RTM_F_TRAP 0x8000 /* route is trapping packets */
+#define RTM_F_OFFLOAD_FAILED 0x20000000 /* route offload failed */
+
+/*
+ * Routing table identifiers.
+ * FreeBSD route table numbering starts from 0.
+ */
+#define RT_TABLE_MAIN 0 /* RT_DEFAULT_FIB */
+#define RT_TABLE_UNSPEC 0xFFFFFFFF /* RT_ALL_FIBS */
+
+
+/* NL_RTA_VIA header */
+struct rtvia {
+ sa_family_t rtvia_family;
+ uint8_t rtvia_addr[0];
+};
+
+/*
+* NL_RTA_METRICS: attribute consisting of
+* array of struct rtattr with types of RTAX_*
+*/
+
+ enum {
+ NL_RTAX_UNSPEC,
+#define NL_RTAX_UNSPEC NL_RTAX_UNSPEC
+ NL_RTAX_LOCK,
+#define NL_RTAX_LOCK NL_RTAX_LOCK
+ NL_RTAX_MTU,
+#define NL_RTAX_MTU NL_RTAX_MTU
+ NL_RTAX_WINDOW,
+#define NL_RTAX_WINDOW NL_RTAX_WINDOW
+ NL_RTAX_RTT,
+#define NL_RTAX_RTT NL_RTAX_RTT
+ NL_RTAX_RTTVAR,
+#define NL_RTAX_RTTVAR NL_RTAX_RTTVAR
+ NL_RTAX_SSTHRESH,
+#define NL_RTAX_SSTHRESH NL_RTAX_SSTHRESH
+ NL_RTAX_CWND,
+#define NL_RTAX_CWND NL_RTAX_CWND
+ NL_RTAX_ADVMSS,
+#define NL_RTAX_ADVMSS NL_RTAX_ADVMSS
+ NL_RTAX_REORDERING,
+#define NL_RTAX_REORDERING NL_RTAX_REORDERING
+ NL_RTAX_HOPLIMIT,
+#define NL_RTAX_HOPLIMIT NL_RTAX_HOPLIMIT
+ NL_RTAX_INITCWND,
+#define NL_RTAX_INITCWND NL_RTAX_INITCWND
+ NL_RTAX_FEATURES,
+#define NL_RTAX_FEATURES NL_RTAX_FEATURES
+ NL_RTAX_RTO_MIN,
+#define NL_RTAX_RTO_MIN NL_RTAX_RTO_MIN
+ NL_RTAX_INITRWND,
+#define NL_RTAX_INITRWND NL_RTAX_INITRWND
+ NL_RTAX_QUICKACK,
+#define NL_RTAX_QUICKACK NL_RTAX_QUICKACK
+ NL_RTAX_CC_ALGO,
+#define NL_RTAX_CC_ALGO NL_RTAX_CC_ALGO
+ NL_RTAX_FASTOPEN_NO_COOKIE,
+#define NL_RTAX_FASTOPEN_NO_COOKIE NL_RTAX_FASTOPEN_NO_COOKIE
+ __NL_RTAX_MAX
+};
+#define NL_RTAX_MAX (__NL_RTAX_MAX - 1)
+
+#define RTAX_FEATURE_ECN (1 << 0)
+#define RTAX_FEATURE_SACK (1 << 1)
+#define RTAX_FEATURE_TIMESTAMP (1 << 2)
+#define RTAX_FEATURE_ALLFRAG (1 << 3)
+
+#define RTAX_FEATURE_MASK \
+ (RTAX_FEATURE_ECN | RTAX_FEATURE_SACK | RTAX_FEATURE_TIMESTAMP | \
+ RTAX_FEATURE_ALLFRAG)
+
+#ifndef _KERNEL
+/*
+ * RTAX_* space clashes with rtsock namespace.
+ * Use NL_RTAX_ prefix in the kernel and map to
+ * RTAX_ for userland.
+ */
+#define RTAX_UNSPEC NL_RTAX_UNSPEC
+#define RTAX_LOCK NL_RTAX_LOCK
+#define RTAX_MTU NL_RTAX_MTU
+#define RTAX_WINDOW NL_RTAX_WINDOW
+#define RTAX_RTT NL_RTAX_RTT
+#define RTAX_RTTVAR NL_RTAX_RTTVAR
+#define RTAX_SSTHRESH NL_RTAX_SSTHRESH
+#define RTAX_CWND NL_RTAX_CWND
+#define RTAX_ADVMSS NL_RTAX_ADVMSS
+#define RTAX_REORDERING NL_RTAX_REORDERING
+#define RTAX_HOPLIMIT NL_RTAX_HOPLIMIT
+#define RTAX_INITCWND NL_RTAX_INITCWND
+#define RTAX_FEATURES NL_RTAX_FEATURES
+#define RTAX_RTO_MIN NL_RTAX_RTO_MIN
+#define RTAX_INITRWND NL_RTAX_INITRWND
+#define RTAX_QUICKACK NL_RTAX_QUICKACK
+#define RTAX_CC_ALGO NL_RTAX_CC_ALGO
+#define RTAX_FASTOPEN_NO_COOKIE NL_RTAX_FASTOPEN_NO_COOKIE
+#endif
+
+
+struct rtnexthop {
+ unsigned short rtnh_len;
+ unsigned char rtnh_flags;
+ unsigned char rtnh_hops;
+ int rtnh_ifindex;
+};
+
+/* rtnh_flags */
+
+#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
+#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
+#define RTNH_F_ONLINK 4 /* Gateway is forced on link */
+#define RTNH_F_OFFLOAD 8 /* Nexthop is offloaded */
+#define RTNH_F_LINKDOWN 16 /* carrier-down on nexthop */
+#define RTNH_F_UNRESOLVED 32 /* The entry is unresolved (ipmr) */
+#define RTNH_F_TRAP 64 /* Nexthop is trapping packets */
+
+#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | \
+ RTNH_F_OFFLOAD | RTNH_F_TRAP)
+
+/* Macros to handle hexthops */
+
+#define RTNH_ALIGNTO 4
+#define RTNH_ALIGN(len) ( ((len)+RTNH_ALIGNTO-1) & ~(RTNH_ALIGNTO-1) )
+#define RTNH_OK(rtnh,len) ((rtnh)->rtnh_len >= sizeof(struct rtnexthop) && \
+ ((int)(rtnh)->rtnh_len) <= (len))
+#define RTNH_NEXT(rtnh) ((struct rtnexthop*)(((char*)(rtnh)) + RTNH_ALIGN((rtnh)->rtnh_len)))
+#define RTNH_LENGTH(len) (RTNH_ALIGN(sizeof(struct rtnexthop)) + (len))
+#define RTNH_SPACE(len) RTNH_ALIGN(RTNH_LENGTH(len))
+#define RTNH_DATA(rtnh) ((struct rtattr*)(((char*)(rtnh)) + RTNH_LENGTH(0)))
+
+
+struct rtgenmsg {
+ unsigned char rtgen_family;
+};
+
+
+#endif
\ No newline at end of file
Index: sys/sys/domain.h
===================================================================
--- sys/sys/domain.h
+++ sys/sys/domain.h
@@ -71,11 +71,13 @@
/* dom_flags */
#define DOMF_SUPPORTED 0x0001 /* System supports this domain. */
#define DOMF_INITED 0x0002 /* Initialized in the default vnet. */
+#define DOMF_UNLOADABLE 0x0004 /* Can be unloaded */
#ifdef _KERNEL
extern int domain_init_status;
extern struct domain *domains;
void domain_add(void *);
+void domain_remove(void *);
void domain_init(void *);
#ifdef VIMAGE
void vnet_domain_init(void *);
@@ -85,6 +87,8 @@
#define DOMAIN_SET(name) \
SYSINIT(domain_add_ ## name, SI_SUB_PROTO_DOMAIN, \
SI_ORDER_FIRST, domain_add, & name ## domain); \
+ SYSUNINIT(domain_remove_ ## name, SI_SUB_PROTO_DOMAIN, \
+ SI_ORDER_FIRST, domain_remove, & name ## domain); \
SYSINIT(domain_init_ ## name, SI_SUB_PROTO_DOMAIN, \
SI_ORDER_SECOND, domain_init, & name ## domain);
#endif /* _KERNEL */
Index: sys/sys/socket.h
===================================================================
--- sys/sys/socket.h
+++ sys/sys/socket.h
@@ -264,6 +264,7 @@
#define AF_ARP 35
#define AF_BLUETOOTH 36 /* Bluetooth sockets */
#define AF_IEEE80211 37 /* IEEE 802.11 protocol */
+#define AF_NETLINK 38 /* Netlink protocol */
#define AF_INET_SDP 40 /* OFED Socket Direct Protocol ipv4 */
#define AF_INET6_SDP 42 /* OFED Socket Direct Protocol ipv6 */
#define AF_HYPERV 43 /* HyperV sockets */
@@ -389,6 +390,7 @@
#define PF_ARP AF_ARP
#define PF_BLUETOOTH AF_BLUETOOTH
#define PF_IEEE80211 AF_IEEE80211
+#define PF_NETLINK AF_NETLINK
#define PF_INET_SDP AF_INET_SDP
#define PF_INET6_SDP AF_INET6_SDP
Index: tests/sys/net/routing/netlink.py
===================================================================
--- /dev/null
+++ tests/sys/net/routing/netlink.py
@@ -0,0 +1,1076 @@
+#!/usr/local/bin/python3
+
+from ctypes import *
+import socket
+import os
+import sys
+import unittest
+import struct
+
+from enum import Enum, auto
+
+from typing import List, Callable, Dict, NamedTuple, Optional
+
+
+def roundup2(val: int, num: int) -> int:
+ if val % num:
+ return (val | (num - 1)) + 1
+ else:
+ return val
+
+
+def align4(val: int) -> int:
+ return roundup2(val, 4)
+
+
+class SockaddrNl(Structure):
+ _fields_ = [
+ ("nl_len", c_ubyte),
+ ("nl_family", c_ubyte),
+ ("nl_pad", c_ushort),
+ ("nl_pid", c_uint),
+ ("nl_groups", c_uint),
+ ]
+
+
+class Nlmsghdr(Structure):
+ _fields_ = [
+ ("nlmsg_len", c_uint),
+ ("nlmsg_type", c_ushort),
+ ("nlmsg_flags", c_ushort),
+ ("nlmsg_seq", c_uint),
+ ("nlmsg_pid", c_uint),
+ ]
+
+
+class Nlmsgerr(Structure):
+ _fields_ = [
+ ("error", c_int),
+ ("msg", Nlmsghdr),
+ ]
+
+
+class RtattrType(Enum):
+ RTA_UNSPEC = 0
+ RTA_DST = auto()
+ RTA_SRC = auto()
+ RTA_IIF = auto()
+ RTA_OIF = auto()
+ RTA_GATEWAY = auto()
+ RTA_PRIORITY = auto()
+ RTA_PREFSRC = auto()
+ RTA_METRICS = auto()
+ RTA_MULTIPATH = auto()
+ RTA_PROTOINFO = auto()
+ RTA_FLOW = auto()
+ RTA_CACHEINFO = auto()
+ RTA_SESSION = auto()
+ RTA_MP_ALGO = auto()
+ RTA_TABLE = auto()
+ RTA_MARK = auto()
+ RTA_MFC_STATS = auto()
+ RTA_VIA = auto()
+ RTA_NEWDST = auto()
+ RTA_PREF = auto()
+ RTA_ENCAP_TYPE = auto()
+ RTA_ENCAP = auto()
+ RTA_EXPIRES = auto()
+ RTA_PAD = auto()
+ RTA_UID = auto()
+ RTA_TTL_PROPAGATE = auto()
+ RTA_IP_PROTO = auto()
+ RTA_SPORT = auto()
+ RTA_DPORT = auto()
+ RTA_NH_ID = auto()
+
+
+class NlMsgType(Enum):
+ NLMSG_NOOP = 1
+ NLMSG_ERROR = 2
+ NLMSG_DONE = 3
+ NLMSG_OVERRUN = 4
+
+
+class NlRtMsgType(Enum):
+ RTM_NEWLINK = 16
+ RTM_DELLINK = 17
+ RTM_GETLINK = 18
+ RTM_SETLINK = 19
+ RTM_NEWADDR = 20
+ RTM_DELADDR = 21
+ RTM_GETADDR = 22
+ RTM_NEWROUTE = 24
+ RTM_DELROUTE = 25
+ RTM_GETROUTE = 26
+ RTM_NEWNEIGH = 28
+ RTM_DELNEIGH = 27
+ RTM_GETNEIGH = 28
+ RTM_NEWRULE = 32
+ RTM_DELRULE = 33
+ RTM_GETRULE = 34
+ RTM_NEWQDISC = 36
+ RTM_DELQDISC = 37
+ RTM_GETQDISC = 38
+ RTM_NEWTCLASS = 40
+ RTM_DELTCLASS = 41
+ RTM_GETTCLASS = 42
+ RTM_NEWTFILTER = 44
+ RTM_DELTFILTER = 45
+ RTM_GETTFILTER = 46
+ RTM_NEWACTION = 48
+ RTM_DELACTION = 49
+ RTM_GETACTION = 50
+ RTM_NEWPREFIX = 52
+ RTM_GETMULTICAST = 58
+ RTM_GETANYCAST = 62
+ RTM_NEWNEIGHTBL = 64
+ RTM_GETNEIGHTBL = 66
+ RTM_SETNEIGHTBL = 67
+ RTM_NEWNDUSEROPT = 68
+ RTM_NEWADDRLABEL = 72
+ RTM_DELADDRLABEL = 73
+ RTM_GETADDRLABEL = 74
+ RTM_GETDCB = 78
+ RTM_SETDCB = 79
+ RTM_NEWNETCONF = 80
+ RTM_GETNETCONF = 82
+ RTM_NEWMDB = 84
+ RTM_DELMDB = 85
+ RTM_GETMDB = 86
+ RTM_NEWNSID = 88
+ RTM_DELNSID = 89
+ RTM_GETNSID = 90
+ RTM_NEWSTATS = 92
+ RTM_GETSTATS = 94
+
+
+class RtAttr(Structure):
+ _fields_ = [
+ ("rta_len", c_ushort),
+ ("rta_type", c_ushort),
+ ]
+
+
+class RtMsgHdr(Structure):
+ _fields_ = [
+ ("rtm_family", c_ubyte),
+ ("rtm_dst_len", c_ubyte),
+ ("rtm_src_len", c_ubyte),
+ ("rtm_tos", c_ubyte),
+ ("rtm_table", c_ubyte),
+ ("rtm_protocol", c_ubyte),
+ ("rtm_scope", c_ubyte),
+ ("rtm_type", c_ubyte),
+ ("rtm_flags", c_uint),
+ ]
+
+
+class RtMsgFlags(Enum):
+ RTM_F_NOTIFY = 0x100
+ RTM_F_CLONED = 0x200
+ RTM_F_EQUALIZE = 0x400
+ RTM_F_PREFIX = 0x800
+ RTM_F_LOOKUP_TABLE = 0x1000
+ RTM_F_FIB_MATCH = 0x2000
+ RTM_F_OFFLOAD = 0x4000
+ RTM_F_TRAP = 0x8000
+ RTM_F_OFFLOAD_FAILED = 0x20000000
+
+
+class AddressFamilyLinux(Enum):
+ AF_INET = socket.AF_INET
+ AF_INET6 = socket.AF_INET6
+ AF_NETLINK = 16
+
+
+class AddressFamilyBsd(Enum):
+ AF_INET = socket.AF_INET
+ AF_INET6 = socket.AF_INET6
+ AF_NETLINK = 38
+
+
+class NlmBaseFlags(Enum):
+ NLM_F_REQUEST = 0x01
+ NLM_F_MULTI = 0x02
+ NLM_F_ACK = 0x04
+ NLM_F_ECHO = 0x08
+ NLM_F_DUMP_INTR = 0x10
+ NLM_F_DUMP_FILTERED = 0x20
+
+# XXX: in python3.8 it is possible to
+# class NlmGetFlags(Enum, NlmBaseFlags):
+
+
+class NlmGetFlags(Enum):
+ NLM_F_ROOT = 0x100
+ NLM_F_MATCH = 0x200
+ NLM_F_ATOMIC = 0x400
+
+
+class NlmNewFlags(Enum):
+ NLM_F_REPLACE = 0x100
+ NLM_F_EXCL = 0x200
+ NLM_F_CREATE = 0x400
+ NLM_F_APPEND = 0x800
+
+
+class NlmDeleteFlags(Enum):
+ NLM_F_NONREC = 0x100
+
+
+class NlmAckFlags(Enum):
+ NLM_F_CAPPED = 0x100
+ NLM_F_ACK_TLVS = 0x200
+
+
+class RtScope(Enum):
+ RT_SCOPE_UNIVERSE = 0
+ RT_SCOPE_SITE = 200
+ RT_SCOPE_LINK = 253
+ RT_SCOPE_HOST = 254
+ RT_SCOPE_NOWHERE = 255
+
+
+class RtType(Enum):
+ RTN_UNSPEC = 0
+ RTN_UNICAST = auto()
+ RTN_LOCAL = auto()
+ RTN_BROADCAST = auto()
+ RTN_ANYCAST = auto()
+ RTN_MULTICAST = auto()
+ RTN_BLACKHOLE = auto()
+ RTN_UNREACHABLE = auto()
+ RTN_PROHIBIT = auto()
+ RTN_THROW = auto()
+ RTN_NAT = auto()
+ RTN_XRESOLVE = auto()
+
+
+class RtProto(Enum):
+ RTPROT_UNSPEC = 0
+ RTPROT_REDIRECT = 1
+ RTPROT_KERNEL = 2
+ RTPROT_BOOT = 3
+ RTPROT_STATIC = 4
+ RTPROT_GATED = 8
+ RTPROT_RA = 9
+ RTPROT_MRT = 10
+ RTPROT_ZEBRA = 11
+ RTPROT_BIRD = 12
+ RTPROT_DNROUTED = 13
+ RTPROT_XORP = 14
+ RTPROT_NTK = 15
+ RTPROT_DHCP = 16
+ RTPROT_MROUTED = 17
+ RTPROT_KEEPALIVED = 18
+ RTPROT_BABEL = 42
+ RTPROT_OPENR = 99
+ RTPROT_BGP = 186
+ RTPROT_ISIS = 187
+ RTPROT_OSPF = 188
+ RTPROT_RIP = 189
+ RTPROT_EIGRP = 192
+
+
+class NlRtaxType(Enum):
+ RTAX_UNSPEC = 0
+ RTAX_LOCK = auto()
+ RTAX_MTU = auto()
+ RTAX_WINDOW = auto()
+ RTAX_RTT = auto()
+ RTAX_RTTVAR = auto()
+ RTAX_SSTHRESH = auto()
+ RTAX_CWND = auto()
+ RTAX_ADVMSS = auto()
+ RTAX_REORDERING = auto()
+ RTAX_HOPLIMIT = auto()
+ RTAX_INITCWND = auto()
+ RTAX_FEATURES = auto()
+ RTAX_RTO_MIN = auto()
+ RTAX_INITRWND = auto()
+ RTAX_QUICKACK = auto()
+ RTAX_CC_ALGO = auto()
+ RTAX_FASTOPEN_NO_COOKIE = auto()
+
+
+class NlRtGroup(Enum):
+ RTNLGRP_NONE = 0
+ RTNLGRP_LINK = auto()
+ RTNLGRP_NOTIFY = auto()
+ RTNLGRP_NEIGH = auto()
+ RTNLGRP_TC = auto()
+ RTNLGRP_IPV4_IFADDR = auto()
+ RTNLGRP_IPV4_MROUTE = auto()
+ RTNLGRP_IPV4_ROUTE = auto()
+ RTNLGRP_IPV4_RULE = auto()
+ RTNLGRP_IPV6_IFADDR = auto()
+ RTNLGRP_IPV6_MROUTE = auto()
+ RTNLGRP_IPV6_ROUTE = auto()
+ RTNLGRP_IPV6_IFINFO = auto()
+ RTNLGRP_DECnet_IFADDR = auto()
+ RTNLGRP_NOP2 = auto()
+ RTNLGRP_DECnet_ROUTE = auto()
+ RTNLGRP_DECnet_RULE = auto()
+ RTNLGRP_NOP4 = auto()
+ RTNLGRP_IPV6_PREFIX = auto()
+ RTNLGRP_IPV6_RULE = auto()
+ RTNLGRP_ND_USEROPT = auto()
+ RTNLGRP_PHONET_IFADDR = auto()
+ RTNLGRP_PHONET_ROUTE = auto()
+ RTNLGRP_DCB = auto()
+ RTNLGRP_IPV4_NETCONF = auto()
+ RTNLGRP_IPV6_NETCONF = auto()
+ RTNLGRP_MDB = auto()
+ RTNLGRP_MPLS_ROUTE = auto()
+ RTNLGRP_NSID = auto()
+ RTNLGRP_MPLS_NETCONF = auto()
+ RTNLGRP_IPV4_MROUTE_R = auto()
+ RTNLGRP_IPV6_MROUTE_R = auto()
+ RTNLGRP_NEXTHOP = auto()
+ RTNLGRP_BRVLAN = auto()
+
+
+class IfinfoMsg(Structure):
+ _fields_ = [
+ ("ifi_family", c_ubyte),
+ ("__ifi_pad", c_ubyte),
+ ("ifi_type", c_ushort),
+ ("ifi_index", c_int),
+ ("ifi_flags", c_uint),
+ ("ifi_change", c_uint),
+ ]
+
+
+class IflattrType(Enum):
+ IFLA_UNSPEC = 0
+ IFLA_ADDRESS = auto()
+ IFLA_BROADCAST = auto()
+ IFLA_IFNAME = auto()
+ IFLA_MTU = auto()
+ IFLA_LINK = auto()
+ IFLA_QDISC = auto()
+ IFLA_STATS = auto()
+ IFLA_COST = auto()
+ IFLA_PRIORITY = auto()
+ IFLA_MASTER = auto()
+ IFLA_WIRELESS = auto()
+ IFLA_PROTINFO = auto()
+ IFLA_TXQLEN = auto()
+ IFLA_MAP = auto()
+ IFLA_WEIGHT = auto()
+ IFLA_OPERSTATE = auto()
+ IFLA_LINKMODE = auto()
+ IFLA_LINKINFO = auto()
+ IFLA_NET_NS_PID = auto()
+ IFLA_IFALIAS = auto()
+ IFLA_NUM_VF = auto()
+ IFLA_VFINFO_LIST = auto()
+ IFLA_STATS64 = auto()
+ IFLA_VF_PORTS = auto()
+ IFLA_PORT_SELF = auto()
+ IFLA_AF_SPEC = auto()
+ IFLA_GROUP = auto()
+ IFLA_NET_NS_FD = auto()
+ IFLA_EXT_MASK = auto()
+ IFLA_PROMISCUITY = auto()
+ IFLA_NUM_TX_QUEUES = auto()
+ IFLA_NUM_RX_QUEUES = auto()
+ IFLA_CARRIER = auto()
+ IFLA_PHYS_PORT_ID = auto()
+ IFLA_CARRIER_CHANGES = auto()
+ IFLA_PHYS_SWITCH_ID = auto()
+ IFLA_LINK_NETNSID = auto()
+ IFLA_PHYS_PORT_NAME = auto()
+ IFLA_PROTO_DOWN = auto()
+ IFLA_GSO_MAX_SEGS = auto()
+ IFLA_GSO_MAX_SIZE = auto()
+ IFLA_PAD = auto()
+ IFLA_XDP = auto()
+ IFLA_EVENT = auto()
+ IFLA_NEW_NETNSID = auto()
+ IFLA_IF_NETNSID = auto()
+ IFLA_CARRIER_UP_COUNT = auto()
+ IFLA_CARRIER_DOWN_COUNT = auto()
+ IFLA_NEW_IFINDEX = auto()
+ IFLA_MIN_MTU = auto()
+ IFLA_MAX_MTU = auto()
+ IFLA_PROP_LIST = auto()
+ IFLA_ALT_IFNAME = auto()
+ IFLA_PERM_ADDRESS = auto()
+ IFLA_PROTO_DOWN_REASON = auto()
+
+
+class IfaddrMsg(Structure):
+ _fields_ = [
+ ("ifa_family", c_ubyte),
+ ("ifa_prefixlen", c_ubyte),
+ ("ifa_flags", c_ubyte),
+ ("ifa_scope", c_ubyte),
+ ("ifa_index", c_uint),
+ ]
+
+
+class IfattrType(Enum):
+ IFA_UNSPEC = 0
+ IFA_ADDRESS = auto()
+ IFA_LOCAL = auto()
+ IFA_LABEL = auto()
+ IFA_BROADCAST = auto()
+ IFA_ANYCAST = auto()
+ IFA_CACHEINFO = auto()
+ IFA_MULTICAST = auto()
+ IFA_FLAGS = auto()
+ IFA_RT_PRIORITY = auto()
+ IFA_TARGET_NETNSID = auto()
+
+
+class NlConst():
+ AF_NETLINK = 38
+ NETLINK_ROUTE = 0
+
+
+class NlHelper():
+ def __init__(self):
+ self._pmap = {}
+ self._af_cls = self.get_af_cls()
+
+ def get_af_cls(self):
+ if sys.platform.startswith("freebsd"):
+ cls = AddressFamilyBsd
+ else:
+ cls = AddressFamilyLinux
+ return cls
+
+ def get_propmap(self, cls):
+ if cls not in self._pmap:
+ ret = {}
+ for prop in dir(cls):
+ if not prop.startswith("_"):
+ ret[getattr(cls, prop).value] = prop
+ self._pmap[cls] = ret
+ return self._pmap[cls]
+
+ def get_name_propmap(self, cls):
+ ret = {}
+ for prop in dir(cls):
+ if not prop.startswith("_"):
+ ret[prop] = getattr(cls, prop).value
+ return ret
+
+ def get_attr_byval(self, cls, attr_val):
+ propmap = self.get_propmap(cls)
+ return propmap.get(attr_val)
+
+ def get_nlmsg_name(self, val):
+ for cls in [NlRtMsgType, NlMsgType]:
+ v = self.get_attr_byval(cls, val)
+ if v is not None:
+ return v
+ return "msg#{}".format(val)
+
+ def get_af_name(self, family):
+ v = self.get_attr_byval(self._af_cls, family)
+ if v is not None:
+ return v
+ return "af#{}".format(family)
+
+ def get_af_value(self, family_str: str) -> int:
+ propmap = self.get_name_propmap(self._af_cls)
+ return propmap.get(family_str)
+
+ def get_rta_name(self, val):
+ return self.get_attr_byval(RtattrType, val)
+
+ def get_bitmask_map(self, cls, val):
+ propmap = self.get_propmap(cls)
+ v = 1
+ ret = {}
+ while val:
+ if v & val:
+ if v in propmap:
+ ret[v] = propmap[v]
+ else:
+ ret[v] = hex(v)
+ val -= v
+ v *= 2
+ return ret
+
+ def get_bitmask_str(self, cls, val):
+ bmap = self.get_bitmask_map(cls, val)
+ return ",".join([v for k, v in bmap.items()])
+
+ def get_nlm_flags_str(self, msg_str: str, reply: bool, val):
+ if reply:
+ return self.get_bitmask_str(NlmAckFlags, val)
+ if msg_str.startswith("RTM_GET"):
+ return self.get_bitmask_str(NlmGetFlags, val)
+ elif msg_str.startswith("RTM_DEL"):
+ return self.get_bitmask_str(NlmDeleteFlags, val)
+ elif msg_str.startswith("RTM_NEW"):
+ return self.get_bitmask_str(NlmNewFlags, val)
+ else:
+ return self.get_bitmask_str(NlmBaseFlags, val)
+
+
+class BaseRtAttr(object):
+ def __init__(self, parent, rta_type, rta_len, data=None):
+ self.parent = parent
+ self.helper = parent.helper
+ self.attr_enum = parent.attr_enum
+ self.rta_type = rta_type & 0x3f
+ self.is_nested = rta_type & (1 << 15)
+ self.network_byte_order = rta_type & (1 << 14)
+ self.rta_len = rta_len
+ self.rta_type_str = self.helper.get_attr_byval(self.attr_enum, self.rta_type) # noqa: E501
+ if data is not None:
+ self._validate(data)
+ self._parse(data)
+ self._orig_data = data
+
+ def print_attribute(self, prepend=""):
+ if self.rta_type_str:
+ type_str = self.rta_type_str
+ else:
+ type_str = "rta#{}".format(self.rta_type)
+ print("{}rta_len={} rta_type={}({}){}".format(prepend,
+ self.rta_len,
+ type_str,
+ self.rta_type,
+ self._print_attr_value())
+ )
+
+ def _print_attr_value(self):
+ return " [" + " ".join(["{:02X}".format(b) for b in self._orig_data[4:]]) + "]" # noqa: E501
+
+ @classmethod
+ def from_bytes(cls, parent, data):
+ if len(data) < sizeof(RtAttr):
+ raise ValueError("length less than rtattr header")
+ rta_hdr = RtAttr.from_buffer_copy(data)
+ self = cls(parent, rta_hdr.rta_type, rta_hdr.rta_len, data[:rta_hdr.rta_len]) # noqa: E501
+ # XXX: nested
+ return self
+
+ def __bytes__(self):
+ ret = self._orig_data
+ if align4(len(ret)) != len(ret):
+ ret += bytes(align4(len(ret)) - len(ret))
+ return ret
+
+ def _validate(self, data):
+ pass
+
+ def _parse(self, data):
+ pass
+
+
+class RtAttrIp(BaseRtAttr):
+ def _validate(self, data):
+ data_len = len(data) - 4
+ if data_len != 4 and data_len != 16:
+ raise ValueError("Error validating attr {}: rta_len is not valid".format( # noqa: E501
+ self.rta_type_str))
+
+ def _parse(self, data):
+ data_len = len(data) - 4
+ if data_len == 4:
+ self.family = socket.AF_INET
+ self.addr = socket.inet_ntop(self.family, data[4:8])
+ else:
+ self.family = socket.AF_INET6
+ self.addr = socket.inet_ntop(self.family, data[4:20])
+
+ def _print_attr_value(self):
+ return " addr={}".format(self.addr)
+
+
+class RtAttrU32(BaseRtAttr):
+ def _validate(self, data):
+ if len(data) != 8:
+ raise ValueError("Error validating attr {}: rta_len is not valid".format( # noqa: E501
+ self.rta_type_str))
+
+ def _parse(self, data):
+ self.value = struct.unpack("@I", data[4:8])[0]
+
+ def _print_attr_value(self):
+ return " value={}".format(self.value)
+
+
+class RtAttrIfindex(RtAttrU32):
+ def _print_attr_value(self):
+ try:
+ ifname = socket.if_indextoname(self.value)
+ return " iface={}(#{})".format(ifname, self.value)
+ except OSError as e:
+ pass
+ return " iface=if#{}".format(self.value)
+
+
+class RtAttrTable(RtAttrU32):
+ def _print_attr_value(self):
+ return " rtable={}".format(self.value)
+
+
+class RtAttrNhId(RtAttrU32):
+ def _print_attr_value(self):
+ return " nh_id={}".format(self.value)
+
+
+class RtAttrVia(BaseRtAttr):
+ def _validate(self, data):
+ data_len = len(data) - 4
+ if data_len == 0:
+ raise ValueError("Error validating attr {}: empty data".format(self.rta_type_str)) # noqa: E501
+ family = int(data_len[0])
+ if family not in (socket.AF_INET, socket.AF_INET6):
+ raise ValueError("Error validating attr {}: unsupported AF {}".format( # noqa: E501
+ self.rta_type_str, family))
+ if family == socket.AF_INET:
+ expected_len = 1 + 4
+ else:
+ expected_len = 1 + 16
+ if data_len != expected_len:
+ raise ValueError("Error validating attr {}: expected len {} got {}".format( # noqa: E501
+ self.rta_type_str, expected_len, data_len))
+
+ def _parse(self, data):
+ data_len = len(data) - 4
+ self.family = int(data_len[0])
+ if self.family == socket.AF_INET:
+ self.addr = socket.inet_ntop(self.family, data[5:9])
+ else:
+ self.addr = socket.inet_ntop(self.family, data[5:21])
+
+ def _print_attr_value(self):
+ return ", via={}".format(self.addr)
+
+
+class RtAttrStr(BaseRtAttr):
+ def _validate(self, data):
+ try:
+ s = data[4:].decode("utf-8")
+ except Exception as e:
+ raise ValueError("wrong utf-8 string")
+
+ def _parse(self, data):
+ self.str = data[4:].decode("utf-8")
+
+ def _print_attr_value(self):
+ return " str=\"{}\"".format(self.str)
+
+
+rta_class_map = {
+ "RTA_DST": RtAttrIp,
+ "RTA_SRC": RtAttrIp,
+ "RTA_IIF": RtAttrIfindex,
+ "RTA_OIF": RtAttrIfindex,
+ "RTA_GATEWAY": RtAttrIp,
+ "RTA_TABLE": RtAttrTable,
+ "RTA_VIA": RtAttrVia,
+ "RTA_NH_ID": RtAttrNhId,
+}
+
+
+ifla_class_map = {
+ "IFLA_MTU": RtAttrU32,
+}
+
+ifa_class_map = {
+ "IFA_ADDRESS": RtAttrIp,
+ "IFA_LOCAL": RtAttrIp,
+ "IFA_LABEL": RtAttrStr,
+ "IFA_BROADCAST": RtAttrIp,
+ "IFA_ANYCAST": RtAttrIp,
+ "IFA_FLAGS": RtAttrU32,
+}
+
+
+class BaseNetlinkMessage(object):
+ def __init__(self, helper, nlmsg_type):
+ self.nlmsg_type = nlmsg_type
+ self.ut = unittest.TestCase()
+ self.rta_list = []
+ self._orig_data = None
+ self.helper = helper
+ self.nl_hdr = Nlmsghdr(nlmsg_type=nlmsg_type)
+
+ def assertEqual(self, a, b, msg=None):
+ self.ut.assertEqual(a, b, msg)
+
+ def assertNotEqual(self, a, b, msg=None):
+ self.ut.assertNotEqual(a, b, msg)
+
+ @staticmethod
+ def parse_nl_header(data: bytes):
+ if len(data) < sizeof(Nlmsghdr):
+ raise ValueError("length less than netlink message header")
+ return Nlmsghdr.from_buffer_copy(data), sizeof(Nlmsghdr)
+
+ def is_reply(self, hdr):
+ return hdr.nlmsg_type == NlMsgType.NLMSG_ERROR.value
+
+ def print_nl_header(self, hdr, prepend=""):
+ # len=44, type=RTM_DELROUTE, flags=NLM_F_REQUEST|NLM_F_ACK, seq=1641163704, pid=0 # noqa: E501
+ is_reply = self.is_reply(hdr)
+ msg_name = self.helper.get_nlmsg_name(hdr.nlmsg_type)
+ print("{}len={}, type={}, flags={}(0x{:X}), seq={}, pid={}".format(
+ prepend,
+ hdr.nlmsg_len,
+ msg_name,
+ self.helper.get_nlm_flags_str(msg_name, is_reply, hdr.nlmsg_flags), # noqa: E501
+ hdr.nlmsg_flags,
+ hdr.nlmsg_seq,
+ hdr.nlmsg_pid
+ ))
+
+ @classmethod
+ def from_bytes(cls, helper, data):
+ try:
+ hdr, hdrlen = BaseNetlinkMessage.parse_nl_header(data)
+ self = cls(helper, hdr.nlmsg_type)
+ self._orig_data = data
+ self.nl_hdr = hdr
+ except ValueError as e:
+ print("Failed to parse nl header: {}".format(e))
+ cls.print_as_bytes(data)
+ raise
+ return self
+
+ def print_message(self):
+ self.print_nl_header(self.nl_hdr)
+
+ @staticmethod
+ def print_as_bytes(data: bytes, descr: str):
+ print("===vv {} (len:{:3d}) vv===".format(descr, len(data)))
+ off = 0
+ step = 16
+ while off < len(data):
+ for i in range(step):
+ if off + i < len(data):
+ print(" {:02X}".format(data[off + i]), end="")
+ print("")
+ off += step
+ print("--------------------")
+
+
+class NetlinkErrorMessage(BaseNetlinkMessage):
+ messages = [NlMsgType.NLMSG_ERROR.value]
+
+ def __init__(self, helper, nlmsg_type, error):
+ super().__init__(helper, nlmsg_type)
+ self.err_hdr = Nlmsgerr()
+
+ def print_error_header(self, errhdr, prepend=""):
+ print("{}error={}, ".format(prepend), end="")
+ self.print_nl_header(errhdr.msg, prepend)
+
+ def print_message(self, prepend=""):
+ self.print_nl_header(self.nl_nhr, prepend)
+ self.print_error_header(self.err_hdr, prepend + " ")
+
+
+class BaseNetlinkRtMessage(BaseNetlinkMessage):
+ attr_class_map = {}
+ attr_enum = None
+
+ def __init__(self, helper, nlm_type):
+ super().__init__(helper, nlm_type)
+ self.base_hdr = None
+
+ def parse_rta_list(self, data: bytes) -> List[BaseRtAttr]:
+ ret = []
+ offset = 0
+ while offset < len(data):
+ # print("OFFSET={}".format(offset))
+ if offset + 4 > len(data):
+ raise ValueError("only {} bytes remaining".format(len(data) - offset)) # noqa: E501
+ rta_hdr = RtAttr.from_buffer_copy(data[offset:])
+ rta_type_str = self.helper.get_attr_byval(self.attr_enum, rta_hdr.rta_type) # noqa: E501
+ cls = self.attr_class_map.get(rta_type_str, BaseRtAttr)
+ rta = cls.from_bytes(self, data[offset:])
+ offset += align4(rta.rta_len)
+ if rta.rta_len == 0:
+ raise ValueError("empty rta len, {} bytes remaining".format(len(data) - offset)) # noqa: E501
+ ret.append(rta)
+ return ret, offset
+
+ @classmethod
+ def from_bytes(cls, helper, data):
+ try:
+ hdr, hdrlen = BaseNetlinkMessage.parse_nl_header(data)
+ self = cls(helper, hdr.nlmsg_type)
+ self._orig_data = data
+ self.nl_hdr = hdr
+ except ValueError as e:
+ print("Failed to parse nl header: {}".format(e))
+ cls.print_as_bytes(data)
+ raise
+
+ offset = align4(hdrlen)
+ try:
+ base_hdr, hdrlen = self.parse_base_header(data[offset:])
+ self.base_hdr = base_hdr
+ offset += align4(hdrlen)
+ except ValueError as e:
+ print("Failed to parse nl rt header: {}".format(e))
+ cls.print_as_bytes(data)
+ raise
+
+ orig_offset = offset
+ try:
+ rta_list, rta_len = self.parse_rta_list(data[offset:])
+ offset += rta_len
+ if offset != len(data):
+ raise ValueError("{} bytes left at the end of the packet".format(len(data) - offset)) # noqa: E501
+ self.rta_list = rta_list
+ except ValueError as e:
+ print("Failed to parse nl rta attributes at offset {}: {}".format(orig_offset, e)) # noqa: E501
+ cls.print_as_bytes(data, "msg dump")
+ cls.print_as_bytes(data[orig_offset:], "failed block")
+ raise
+ return self
+
+ def __bytes__(self):
+ ret = bytes()
+ for rta in self.rta_list:
+ ret += bytes(rta)
+ ret = bytes(self.base_hdr) + ret
+ self.nl_hdr.nlmsg_len = len(ret) + sizeof(Nlmsghdr)
+ return bytes(self.nl_hdr) + ret
+
+ def print_message(self):
+ self.print_nl_header(self.nl_hdr)
+ self.print_base_header(self.base_hdr, " ")
+ for rta in self.rta_list:
+ rta.print_attribute(" ")
+
+
+class NetlinkRtMessage(BaseNetlinkRtMessage):
+ messages = [
+ NlRtMsgType.RTM_NEWROUTE.value,
+ NlRtMsgType.RTM_DELROUTE.value,
+ NlRtMsgType.RTM_GETROUTE.value,
+ ]
+ attr_class_map = rta_class_map
+ attr_enum = RtattrType
+
+ def __init__(self, helper, nlm_type):
+ super().__init__(helper, nlm_type)
+ self.base_hdr = RtMsgHdr()
+
+ def parse_base_header(self, data):
+ if len(data) < sizeof(RtMsgHdr):
+ raise ValueError("length less than rtmsg header")
+ rtm_hdr = RtMsgHdr.from_buffer_copy(data)
+ return (rtm_hdr, sizeof(RtMsgHdr))
+
+ def print_base_header(self, hdr, prepend=""):
+ family = self.helper.get_af_name(hdr.rtm_family)
+ print("{}family={}, dst_len={}, src_len={}, tos={}, table={}, protocol={}({}), scope={}({}), type={}({}), flags={}({})".format( # noqa: E501
+ prepend,
+ family,
+ hdr.rtm_dst_len,
+ hdr.rtm_src_len,
+ hdr.rtm_tos,
+ hdr.rtm_table,
+ self.helper.get_attr_byval(RtProto, hdr.rtm_protocol),
+ hdr.rtm_protocol,
+ self.helper.get_attr_byval(RtScope, hdr.rtm_scope),
+ hdr.rtm_scope,
+ self.helper.get_attr_byval(RtType, hdr.rtm_type),
+ hdr.rtm_type,
+ self.helper.get_bitmask_str(RtMsgFlags, hdr.rtm_flags),
+ hdr.rtm_flags))
+
+
+class NetlinkIflaMessage(BaseNetlinkRtMessage):
+ messages = [
+ NlRtMsgType.RTM_NEWLINK.value,
+ NlRtMsgType.RTM_DELLINK.value,
+ NlRtMsgType.RTM_GETLINK.value,
+ ]
+ attr_class_map = ifla_class_map
+ attr_enum = IflattrType
+
+ def __init__(self, helper, nlm_type):
+ super().__init__(helper, nlm_type)
+ self.base_hdr = IfinfoMsg()
+
+ def parse_base_header(self, data):
+ if len(data) < sizeof(IfinfoMsg):
+ raise ValueError("length less than IfinfoMsg header")
+ rtm_hdr = IfinfoMsg.from_buffer_copy(data)
+ return (rtm_hdr, sizeof(IfinfoMsg))
+
+ def print_base_header(self, hdr, prepend=""):
+ family = self.helper.get_af_name(hdr.ifi_family)
+ print("{}family={}, ifi_type={}, ifi_index={}, ifi_flags={}, ifi_change={}".format( # noqa: E501
+ prepend,
+ family,
+ hdr.ifi_type,
+ hdr.ifi_index,
+ hdr.ifi_flags,
+ hdr.ifi_change))
+
+
+class NetlinkIfaMessage(BaseNetlinkRtMessage):
+ messages = [
+ NlRtMsgType.RTM_NEWADDR.value,
+ NlRtMsgType.RTM_DELADDR.value,
+ NlRtMsgType.RTM_GETADDR.value,
+ ]
+ attr_class_map = ifa_class_map
+ attr_enum = IfattrType
+
+ def __init__(self, helper, nlm_type):
+ super().__init__(helper, nlm_type)
+ self.base_hdr = IfaddrMsg()
+
+ def parse_base_header(self, data):
+ if len(data) < sizeof(IfaddrMsg):
+ raise ValueError("length less than IfaddrMsg header")
+ rtm_hdr = IfaddrMsg.from_buffer_copy(data)
+ return (rtm_hdr, sizeof(IfaddrMsg))
+
+ def print_base_header(self, hdr, prepend=""):
+ family = self.helper.get_af_name(hdr.ifa_family)
+ print("{}family={}, ifa_prefixlen={}, ifa_flags={}, ifa_scope={}, ifa_index={}".format( # noqa: E501
+ prepend,
+ family,
+ hdr.ifa_prefixlen,
+ hdr.ifa_flags,
+ hdr.ifa_scope,
+ hdr.ifa_index))
+
+
+class Nlsock():
+ def __init__(self, helper):
+ self.helper = helper
+ self.sock_fd = self._setup_netlink()
+ self._data = bytes()
+ self.rtm_seq = 1
+ self.pid = os.getpid()
+ self.msgmap = self.build_msgmap()
+ self.set_groups(NlRtGroup.RTNLGRP_IPV4_ROUTE.value | NlRtGroup.RTNLGRP_IPV6_ROUTE.value) # noqa: E501
+
+ def build_msgmap(self):
+ classes = [NetlinkRtMessage, NetlinkIfaMessage, NetlinkErrorMessage]
+ xmap = {}
+ for cls in classes:
+ for message in cls.messages:
+ xmap[message] = cls
+ return xmap
+
+ def get_seq(self):
+ ret = self.rtm_seq
+ self.rtm_seq += 1
+ return ret
+
+ def _setup_netlink(self) -> int:
+ family = self.helper.get_af_value("AF_NETLINK")
+ s = socket.socket(family, socket.SOCK_RAW, NlConst.NETLINK_ROUTE)
+ return s
+
+ def set_groups(self, mask: int):
+ self.sock_fd.setsockopt(socket.SOL_SOCKET, 1, mask)
+ # snl = SockaddrNl(nl_len = sizeof(SockaddrNl), nl_family=38,
+ # nl_pid=self.pid, nl_groups=mask)
+ # xbuffer = create_string_buffer(sizeof(SockaddrNl))
+ # memmove(xbuffer, addressof(snl), sizeof(SockaddrNl))
+ # k = struct.pack("@BBHII", 12, 38, 0, self.pid, mask)
+ # self.sock_fd.bind(k)
+
+ def write_message(self, msg):
+ print("vvvvvvvv OUT vvvvvvvv")
+ msg.print_message()
+ msg_bytes = bytes(msg)
+ try:
+ ret = os.write(self.sock_fd.fileno(), bytes(msg))
+ except Exception as e:
+ print("write({}) -> {}".format(len(msg_bytes), e))
+
+ def parse_message(self, data: bytes):
+ if len(data) < sizeof(Nlmsghdr):
+ raise Exception("Short read from nl: {} bytes".format(len(data)))
+ hdr = Nlmsghdr.from_buffer_copy(data)
+ nlmsg_type = hdr.nlmsg_type
+ cls = self.msgmap.get(nlmsg_type)
+ if not cls:
+ cls = BaseNetlinkMessage
+ return cls.from_bytes(self.helper, data)
+
+ def write_data(self, data: bytes):
+ self.sock_fd.send(data)
+
+ def read_data(self):
+ while True:
+ data = self.sock_fd.recv(65535)
+ self._data += data
+ if len(self._data) >= sizeof(Nlmsghdr):
+ break
+ if seq is None:
+ break
+ hdr = Nlmsghdr.from_buffer_copy(data)
+ if hdr.nlmsg_pid == self.pid and hdr.nlmsg_seq == seq:
+ break
+ return data
+
+ def read_message(self) -> bytes:
+ if len(self._data) < sizeof(Nlmsghdr):
+ self.read_data()
+ hdr = Nlmsghdr.from_buffer_copy(self._data)
+ while (hdr.nlmsg_len > len(self._data)):
+ self.read_data()
+ raw_msg = self._data[:hdr.nlmsg_len]
+ self._data = self._data[hdr.nlmsg_len:]
+ return self.parse_message(raw_msg)
+
+ def fill_msg_seq(self, msg):
+ msg.nl_hdr.nlmsg_seq = self.get_seq()
+ msg.nl_hdr.nlmsg_pid = self.pid
+
+ def request_ifaddrs(self, family):
+ msg = NetlinkIfaMessage(self.helper, NlRtMsgType.RTM_GETADDR.value)
+ flags = NlmGetFlags.NLM_F_ROOT.value | NlmGetFlags.NLM_F_MATCH.value
+ self.fill_msg_seq(msg)
+ msg.base_hdr.ifa_family = family
+ msg.nl_hdr.nlmsg_flags = flags | NlmBaseFlags.NLM_F_REQUEST.value
+
+ msg_bytes = bytes(msg)
+ x = self.parse_message(msg_bytes)
+ x.print_message()
+ print(msg_bytes)
+ # Skip family for now
+ self.write_data(msg_bytes)
+
+ def request_routes(self, family):
+ msg = NetlinkRtMessage(self.helper, NlRtMsgType.RTM_GETROUTE.value)
+ flags = NlmGetFlags.NLM_F_ROOT.value | NlmGetFlags.NLM_F_MATCH.value
+ self.fill_msg_seq(msg)
+ msg.base_hdr.rtm_family = family
+ msg.nl_hdr.nlmsg_flags = flags | NlmBaseFlags.NLM_F_REQUEST.value
+
+ msg_bytes = bytes(msg)
+ x = self.parse_message(msg_bytes)
+ x.print_message()
+ print(msg_bytes)
+ # Skip family for now
+ self.write_data(msg_bytes)
+
+
+def main():
+ helper = NlHelper()
+ nl = Nlsock(helper)
+ # nl.request_ifaddrs(socket.AF_INET)
+ nl.request_routes(0)
+ while True:
+ msg = nl.read_message()
+ print("")
+ msg.print_message()
+
+ pass
+
+
+if __name__ == "__main__":
+ main()

File Metadata

Mime Type
text/plain
Expires
Fri, Nov 8, 11:52 PM (20 h, 51 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14545741
Default Alt Text
D36002.id108752.diff (223 KB)

Event Timeline