Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F101999520
D33265.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
10 KB
Referenced Files
None
Subscribers
None
D33265.diff
View Options
Index: sys/net/if.c
===================================================================
--- sys/net/if.c
+++ sys/net/if.c
@@ -310,12 +310,42 @@
VNET_DEFINE(struct ifnethead, ifnet); /* depend on static init XXX */
VNET_DEFINE(struct ifgrouphead, ifg_head);
-/* Table of ifnet by index. */
+/*
+ * Tables of ifnet by index.
+ *
+ * We keep a global if_table indexing all interfaces across all vnets. The
+ * position in this table is immutable and doesn't change after if_vmove().
+ * This table can be used to serialize ifnet pointer to index and later
+ * restore it.
+ *
+ * Also every vnet keeps its own V_ifindex_table. This one starts with 1 and
+ * is guaranteed to be populated without holes. Its maximum index always
+ * matches overall number of interfaces in the vnet. It is used by all kinds
+ * of userland APIs like multicast setsockopt(2) or net.link.generic.ifdata
+ * sysctl MIB. Entries in this array don't point at interface directly,
+ * but at the global table entry.
+ *
+ * lo0 em0 epair0a lo0 epair0b
+ * / / / / /
+ * if_table [0 ] [1 ] [2 ] [3 ] [4 ]
+ * | | | | |
+ * vnet0_ifindex_table [0 NULL] [1 ] [2 ] [3 ] | |
+ * vnet1_ifindex_table [0 NULL] [1 ] [2 ]
+ */
+
+static int if_idx;
+static int if_lim = 8;
+static struct ifindex_entry {
+ struct ifnet *ife_ifnet;
+ uint16_t ife_gencnt;
+} *if_table;
+
+/* Per-VNET API index. */
VNET_DEFINE_STATIC(int, if_index);
#define V_if_index VNET(if_index)
-VNET_DEFINE_STATIC(int, if_indexlim) = 8;
+VNET_DEFINE_STATIC(int, if_indexlim) = 2;
#define V_if_indexlim VNET(if_indexlim)
-VNET_DEFINE_STATIC(struct ifnet **, ifindex_table);
+VNET_DEFINE_STATIC(struct ifindex_entry **, ifindex_table);
#define V_ifindex_table VNET(ifindex_table)
SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system,
@@ -352,13 +382,15 @@
struct ifnet *
ifnet_byindex(int idx)
{
+ struct ifindex_entry *ife;
NET_EPOCH_ASSERT();
if (__predict_false(idx <= 0 || idx > V_if_index))
return (NULL);
- return (ck_pr_load_ptr(&V_ifindex_table[idx]));
+ ife = ck_pr_load_ptr(&V_ifindex_table[idx]);
+ return (ife == NULL ? NULL : ck_pr_load_ptr(&ife->ife_ifnet));
}
struct ifnet *
@@ -375,30 +407,72 @@
}
/*
- * Allocate an ifindex array entry.
+ * Allocate global and virtual ifindex entries for ifnet.
+ * For a new interface ife = NULL. For interface being if_vmove'd,
+ * ife points to already allocated entry in the global index.
*/
static void
-ifindex_alloc(struct ifnet *ifp)
+ifindex_alloc(struct ifnet *ifp, struct ifindex_entry *ife)
{
u_short idx;
IFNET_WLOCK();
- /*
- * Try to find an empty slot below V_if_index. If we fail, take the
- * next slot.
- */
+
+ if (ife != NULL)
+ goto virtual;
+
+ /* First allocate index in the global table. */
+ for (idx = 0; idx <= if_idx; idx++)
+ if (if_table[idx].ife_ifnet == NULL)
+ break;
+ if (idx >= if_lim) { /* Need to grow. */
+ VNET_ITERATOR_DECL(vnet_iter);
+ struct ifindex_entry *new, *old;
+ int newlim;
+
+ newlim = if_lim << 1;
+ new = malloc(newlim * sizeof(*new), M_IFNET,
+ M_WAITOK | M_ZERO);
+ memcpy(new, if_table, if_lim * sizeof(*new));
+ old = if_table;
+ ck_pr_store_ptr(&if_table, new);
+ if_lim = newlim;
+
+ /* Rewrite every vnet index to use new global index. */
+ VNET_LIST_RLOCK_NOSLEEP();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ for (u_short i = 1; i <= V_if_index; i++) {
+ new = V_ifindex_table[i] - old + if_table;
+ ck_pr_store_ptr(&V_ifindex_table[i], new);
+ }
+ CURVNET_RESTORE();
+ }
+ VNET_LIST_RUNLOCK_NOSLEEP();
+
+ epoch_wait_preempt(net_epoch_preempt);
+ free(old, M_IFNET);
+ }
+ if (idx > if_idx)
+ if_idx = idx;
+ ifp->if_idx = idx;
+ ifp->if_idxgen = if_table[idx].ife_gencnt;
+ ck_pr_store_ptr(&if_table[idx].ife_ifnet, ifp);
+
+ ife = &if_table[idx];
+virtual:
+ /* Second, allocate entry in V_ifindex_table. */
for (idx = 1; idx <= V_if_index; idx++) {
if (V_ifindex_table[idx] == NULL)
break;
}
-
- /* Catch if_index overflow. */
- if (idx >= V_if_indexlim) {
- struct ifnet **new, **old;
+ if (idx >= V_if_indexlim) { /* Need to grow. */
+ struct ifindex_entry **new, **old;
int newlim;
newlim = V_if_indexlim << 1;
- new = malloc(newlim * sizeof(*new), M_IFNET, M_WAITOK | M_ZERO);
+ new = malloc(newlim * sizeof(*new), M_IFNET,
+ M_WAITOK | M_ZERO);
memcpy(new, V_ifindex_table, V_if_indexlim * sizeof(*new));
old = V_ifindex_table;
ck_pr_store_ptr(&V_ifindex_table, new);
@@ -408,24 +482,11 @@
}
if (idx > V_if_index)
V_if_index = idx;
-
ifp->if_index = idx;
- ck_pr_store_ptr(&V_ifindex_table[idx], ifp);
+ ck_pr_store_ptr(&V_ifindex_table[idx], ife);
IFNET_WUNLOCK();
}
-static void
-ifindex_free(u_short idx)
-{
-
- IFNET_WLOCK_ASSERT();
-
- ck_pr_store_ptr(&V_ifindex_table[idx], NULL);
- while (V_if_index > 0 &&
- V_ifindex_table[V_if_index] == NULL)
- V_if_index--;
-}
-
struct ifaddr *
ifaddr_byindex(u_short idx)
{
@@ -447,6 +508,15 @@
* parameters.
*/
+static void
+if_init(const void *arg __unused)
+{
+
+ if_table = malloc(if_lim * sizeof(*if_table), M_IFNET,
+ M_WAITOK | M_ZERO);
+}
+SYSINIT(if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, if_init, NULL);
+
static void
vnet_if_init(const void *unused __unused)
{
@@ -606,7 +676,7 @@
ifp->if_get_counter = if_get_counter_default;
ifp->if_pcp = IFNET_PCP_NONE;
- ifindex_alloc(ifp);
+ ifindex_alloc(ifp, NULL);
return (ifp);
}
@@ -668,26 +738,18 @@
ifp->if_flags |= IFF_DYING; /* XXX: Locking */
- /*
- * XXXGL: An interface index is really an alias to ifp pointer.
- * Why would we clear the alias now, and not in the deferred
- * context? Indeed there is nothing wrong with some network
- * thread obtaining ifp via ifnet_byindex() inside the network
- * epoch and then dereferencing ifp while we peform if_free(),
- * and after if_free() finished, too.
- *
- * The reason is the VIMAGE. For some reason it was designed
- * to require all sockets drained before destroying, but not all
- * ifnets. A vnet destruction calls if_vmove() on ifnet, which
- * causes ID change. But ID change and a possible misidentification
- * of an ifnet later is a lesser problem, as it doesn't crash kernel.
- * A worse problem is that removed interface may outlive the vnet it
- * belongs too! The if_free_deferred() would see ifp->if_vnet freed.
- */
CURVNET_SET_QUIET(ifp->if_vnet);
IFNET_WLOCK();
- MPASS(V_ifindex_table[ifp->if_index] == ifp);
- ifindex_free(ifp->if_index);
+ MPASS(V_ifindex_table[ifp->if_index]->ife_ifnet == ifp);
+ /* First, clear global index entry. */
+ ck_pr_store_ptr(&V_ifindex_table[ifp->if_index]->ife_ifnet, NULL);
+ V_ifindex_table[ifp->if_index]->ife_gencnt++;
+ while (if_idx > 0 && if_table[if_idx].ife_ifnet == NULL)
+ if_idx--;
+ /* Second, clear virtual index entry. */
+ ck_pr_store_ptr(&V_ifindex_table[ifp->if_index], NULL);
+ while (V_if_index > 0 && V_ifindex_table[V_if_index] == NULL)
+ V_if_index--;
IFNET_WUNLOCK();
if (refcount_release(&ifp->if_refcount))
@@ -836,7 +898,7 @@
struct sockaddr_dl *sdl;
struct ifaddr *ifa;
- MPASS(V_ifindex_table[ifp->if_index] == ifp);
+ MPASS(V_ifindex_table[ifp->if_index]->ife_ifnet == ifp);
#ifdef VIMAGE
ifp->if_vnet = curvnet;
@@ -1271,6 +1333,7 @@
if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
{
struct if_clone *ifc;
+ struct ifindex_entry *ife;
#ifdef DEV_BPF
u_int bif_dlt, bif_hdrlen;
#endif
@@ -1295,14 +1358,17 @@
return (rc);
/*
- * Unlink the ifnet from ifindex_table[] in current vnet, and shrink
+ * Unlink the ifnet from V_ifindex_table in current vnet, and shrink
* the if_index for that vnet if possible.
*
* NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized,
* or we'd lock on one vnet and unlock on another.
*/
IFNET_WLOCK();
- ifindex_free(ifp->if_index);
+ ife = V_ifindex_table[ifp->if_index];
+ ck_pr_store_ptr(&V_ifindex_table[ifp->if_index], NULL);
+ while (V_if_index > 0 && V_ifindex_table[V_if_index] == NULL)
+ V_if_index--;
IFNET_WUNLOCK();
/*
@@ -1316,7 +1382,7 @@
* Switch to the context of the target vnet.
*/
CURVNET_SET_QUIET(new_vnet);
- ifindex_alloc(ifp);
+ ifindex_alloc(ifp, ife);
if_attach_internal(ifp, 1, ifc);
#ifdef DEV_BPF
@@ -4545,8 +4611,9 @@
IF_DB_PRINTF("%d", if_dunit);
IF_DB_PRINTF("%s", if_description);
IF_DB_PRINTF("%u", if_index);
+ IF_DB_PRINTF("%u", if_idx);
+ IF_DB_PRINTF("%u", if_idxgen);
IF_DB_PRINTF("%u", if_refcount);
- IF_DB_PRINTF("%d", if_index_reserved);
IF_DB_PRINTF("%p", if_softc);
IF_DB_PRINTF("%p", if_l2com);
IF_DB_PRINTF("%p", if_llsoftc);
@@ -4607,7 +4674,7 @@
db_printf("vnet=%p\n", curvnet);
#endif
for (idx = 1; idx <= V_if_index; idx++) {
- ifp = V_ifindex_table[idx];
+ ifp = V_ifindex_table[idx]->ife_ifnet;
if (ifp == NULL)
continue;
db_printf( "%20s ifp=%p\n", ifp->if_xname, ifp);
Index: sys/net/if_var.h
===================================================================
--- sys/net/if_var.h
+++ sys/net/if_var.h
@@ -301,16 +301,17 @@
LIST_ENTRY(ifnet) if_clones; /* interfaces of a cloner */
CK_STAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if (CK_) */
/* protected by if_addr_lock */
- u_char if_alloctype; /* if_type at time of allocation */
- uint8_t if_numa_domain; /* NUMA domain of device */
+ uint8_t if_alloctype; /* if_type at time of allocation */
+ uint8_t if_numa_domain; /* NUMA domain of device */
+ uint16_t if_index; /* numeric abbreviation for this if */
+ uint16_t if_idx; /* immutable index ... */
+ uint16_t if_idxgen; /* ... and its generation count */
/* Driver and protocol specific information that remains stable. */
void *if_softc; /* pointer to driver state */
void *if_llsoftc; /* link layer softc */
void *if_l2com; /* pointer to protocol bits */
const char *if_dname; /* driver name */
int if_dunit; /* unit or IF_DUNIT_NONE */
- u_short if_index; /* numeric abbreviation for this if */
- short if_index_reserved; /* spare space to grow if_index */
char if_xname[IFNAMSIZ]; /* external name (name + unit) */
char *if_description; /* interface description */
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Nov 7, 9:30 AM (22 h, 14 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14508578
Default Alt Text
D33265.diff (10 KB)
Attached To
Mode
D33265: ifindex: provide global immutable complementary to per-vnet index
Attached
Detach File
Event Timeline
Log In to Comment