Page MenuHomeFreeBSD

D33265.diff
No OneTemporary

D33265.diff

Index: sys/net/if.c
===================================================================
--- sys/net/if.c
+++ sys/net/if.c
@@ -310,12 +310,42 @@
VNET_DEFINE(struct ifnethead, ifnet); /* depend on static init XXX */
VNET_DEFINE(struct ifgrouphead, ifg_head);
-/* Table of ifnet by index. */
+/*
+ * Tables of ifnet by index.
+ *
+ * We keep a global if_table indexing all interfaces across all vnets. The
+ * position in this table is immutable and doesn't change after if_vmove().
+ * This table can be used to serialize ifnet pointer to index and later
+ * restore it.
+ *
+ * Also every vnet keeps its own V_ifindex_table. This one starts with 1 and
+ * is guaranteed to be populated without holes. Its maximum index always
+ * matches overall number of interfaces in the vnet. It is used by all kinds
+ * of userland APIs like multicast setsockopt(2) or net.link.generic.ifdata
+ * sysctl MIB. Entries in this array don't point at interface directly,
+ * but at the global table entry.
+ *
+ * lo0 em0 epair0a lo0 epair0b
+ * / / / / /
+ * if_table [0 ] [1 ] [2 ] [3 ] [4 ]
+ * | | | | |
+ * vnet0_ifindex_table [0 NULL] [1 ] [2 ] [3 ] | |
+ * vnet1_ifindex_table [0 NULL] [1 ] [2 ]
+ */
+
+static int if_idx;
+static int if_lim = 8;
+static struct ifindex_entry {
+ struct ifnet *ife_ifnet;
+ uint16_t ife_gencnt;
+} *if_table;
+
+/* Per-VNET API index. */
VNET_DEFINE_STATIC(int, if_index);
#define V_if_index VNET(if_index)
-VNET_DEFINE_STATIC(int, if_indexlim) = 8;
+VNET_DEFINE_STATIC(int, if_indexlim) = 2;
#define V_if_indexlim VNET(if_indexlim)
-VNET_DEFINE_STATIC(struct ifnet **, ifindex_table);
+VNET_DEFINE_STATIC(struct ifindex_entry **, ifindex_table);
#define V_ifindex_table VNET(ifindex_table)
SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system,
@@ -352,13 +382,15 @@
struct ifnet *
ifnet_byindex(int idx)
{
+ struct ifindex_entry *ife;
NET_EPOCH_ASSERT();
if (__predict_false(idx <= 0 || idx > V_if_index))
return (NULL);
- return (ck_pr_load_ptr(&V_ifindex_table[idx]));
+ ife = ck_pr_load_ptr(&V_ifindex_table[idx]);
+ return (ife == NULL ? NULL : ck_pr_load_ptr(&ife->ife_ifnet));
}
struct ifnet *
@@ -375,30 +407,72 @@
}
/*
- * Allocate an ifindex array entry.
+ * Allocate global and virtual ifindex entries for ifnet.
+ * For a new interface ife = NULL. For interface being if_vmove'd,
+ * ife points to already allocated entry in the global index.
*/
static void
-ifindex_alloc(struct ifnet *ifp)
+ifindex_alloc(struct ifnet *ifp, struct ifindex_entry *ife)
{
u_short idx;
IFNET_WLOCK();
- /*
- * Try to find an empty slot below V_if_index. If we fail, take the
- * next slot.
- */
+
+ if (ife != NULL)
+ goto virtual;
+
+ /* First allocate index in the global table. */
+ for (idx = 0; idx <= if_idx; idx++)
+ if (if_table[idx].ife_ifnet == NULL)
+ break;
+ if (idx >= if_lim) { /* Need to grow. */
+ VNET_ITERATOR_DECL(vnet_iter);
+ struct ifindex_entry *new, *old;
+ int newlim;
+
+ newlim = if_lim << 1;
+ new = malloc(newlim * sizeof(*new), M_IFNET,
+ M_WAITOK | M_ZERO);
+ memcpy(new, if_table, if_lim * sizeof(*new));
+ old = if_table;
+ ck_pr_store_ptr(&if_table, new);
+ if_lim = newlim;
+
+ /* Rewrite every vnet index to use new global index. */
+ VNET_LIST_RLOCK_NOSLEEP();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ for (u_short i = 1; i <= V_if_index; i++) {
+ new = V_ifindex_table[i] - old + if_table;
+ ck_pr_store_ptr(&V_ifindex_table[i], new);
+ }
+ CURVNET_RESTORE();
+ }
+ VNET_LIST_RUNLOCK_NOSLEEP();
+
+ epoch_wait_preempt(net_epoch_preempt);
+ free(old, M_IFNET);
+ }
+ if (idx > if_idx)
+ if_idx = idx;
+ ifp->if_idx = idx;
+ ifp->if_idxgen = if_table[idx].ife_gencnt;
+ ck_pr_store_ptr(&if_table[idx].ife_ifnet, ifp);
+
+ ife = &if_table[idx];
+virtual:
+ /* Second, allocate entry in V_ifindex_table. */
for (idx = 1; idx <= V_if_index; idx++) {
if (V_ifindex_table[idx] == NULL)
break;
}
-
- /* Catch if_index overflow. */
- if (idx >= V_if_indexlim) {
- struct ifnet **new, **old;
+ if (idx >= V_if_indexlim) { /* Need to grow. */
+ struct ifindex_entry **new, **old;
int newlim;
newlim = V_if_indexlim << 1;
- new = malloc(newlim * sizeof(*new), M_IFNET, M_WAITOK | M_ZERO);
+ new = malloc(newlim * sizeof(*new), M_IFNET,
+ M_WAITOK | M_ZERO);
memcpy(new, V_ifindex_table, V_if_indexlim * sizeof(*new));
old = V_ifindex_table;
ck_pr_store_ptr(&V_ifindex_table, new);
@@ -408,24 +482,11 @@
}
if (idx > V_if_index)
V_if_index = idx;
-
ifp->if_index = idx;
- ck_pr_store_ptr(&V_ifindex_table[idx], ifp);
+ ck_pr_store_ptr(&V_ifindex_table[idx], ife);
IFNET_WUNLOCK();
}
-static void
-ifindex_free(u_short idx)
-{
-
- IFNET_WLOCK_ASSERT();
-
- ck_pr_store_ptr(&V_ifindex_table[idx], NULL);
- while (V_if_index > 0 &&
- V_ifindex_table[V_if_index] == NULL)
- V_if_index--;
-}
-
struct ifaddr *
ifaddr_byindex(u_short idx)
{
@@ -447,6 +508,15 @@
* parameters.
*/
+static void
+if_init(const void *arg __unused)
+{
+
+ if_table = malloc(if_lim * sizeof(*if_table), M_IFNET,
+ M_WAITOK | M_ZERO);
+}
+SYSINIT(if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, if_init, NULL);
+
static void
vnet_if_init(const void *unused __unused)
{
@@ -606,7 +676,7 @@
ifp->if_get_counter = if_get_counter_default;
ifp->if_pcp = IFNET_PCP_NONE;
- ifindex_alloc(ifp);
+ ifindex_alloc(ifp, NULL);
return (ifp);
}
@@ -668,26 +738,18 @@
ifp->if_flags |= IFF_DYING; /* XXX: Locking */
- /*
- * XXXGL: An interface index is really an alias to ifp pointer.
- * Why would we clear the alias now, and not in the deferred
- * context? Indeed there is nothing wrong with some network
- * thread obtaining ifp via ifnet_byindex() inside the network
- * epoch and then dereferencing ifp while we peform if_free(),
- * and after if_free() finished, too.
- *
- * The reason is the VIMAGE. For some reason it was designed
- * to require all sockets drained before destroying, but not all
- * ifnets. A vnet destruction calls if_vmove() on ifnet, which
- * causes ID change. But ID change and a possible misidentification
- * of an ifnet later is a lesser problem, as it doesn't crash kernel.
- * A worse problem is that removed interface may outlive the vnet it
- * belongs too! The if_free_deferred() would see ifp->if_vnet freed.
- */
CURVNET_SET_QUIET(ifp->if_vnet);
IFNET_WLOCK();
- MPASS(V_ifindex_table[ifp->if_index] == ifp);
- ifindex_free(ifp->if_index);
+ MPASS(V_ifindex_table[ifp->if_index]->ife_ifnet == ifp);
+ /* First, clear global index entry. */
+ ck_pr_store_ptr(&V_ifindex_table[ifp->if_index]->ife_ifnet, NULL);
+ V_ifindex_table[ifp->if_index]->ife_gencnt++;
+ while (if_idx > 0 && if_table[if_idx].ife_ifnet == NULL)
+ if_idx--;
+ /* Second, clear virtual index entry. */
+ ck_pr_store_ptr(&V_ifindex_table[ifp->if_index], NULL);
+ while (V_if_index > 0 && V_ifindex_table[V_if_index] == NULL)
+ V_if_index--;
IFNET_WUNLOCK();
if (refcount_release(&ifp->if_refcount))
@@ -836,7 +898,7 @@
struct sockaddr_dl *sdl;
struct ifaddr *ifa;
- MPASS(V_ifindex_table[ifp->if_index] == ifp);
+ MPASS(V_ifindex_table[ifp->if_index]->ife_ifnet == ifp);
#ifdef VIMAGE
ifp->if_vnet = curvnet;
@@ -1271,6 +1333,7 @@
if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
{
struct if_clone *ifc;
+ struct ifindex_entry *ife;
#ifdef DEV_BPF
u_int bif_dlt, bif_hdrlen;
#endif
@@ -1295,14 +1358,17 @@
return (rc);
/*
- * Unlink the ifnet from ifindex_table[] in current vnet, and shrink
+ * Unlink the ifnet from V_ifindex_table in current vnet, and shrink
* the if_index for that vnet if possible.
*
* NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized,
* or we'd lock on one vnet and unlock on another.
*/
IFNET_WLOCK();
- ifindex_free(ifp->if_index);
+ ife = V_ifindex_table[ifp->if_index];
+ ck_pr_store_ptr(&V_ifindex_table[ifp->if_index], NULL);
+ while (V_if_index > 0 && V_ifindex_table[V_if_index] == NULL)
+ V_if_index--;
IFNET_WUNLOCK();
/*
@@ -1316,7 +1382,7 @@
* Switch to the context of the target vnet.
*/
CURVNET_SET_QUIET(new_vnet);
- ifindex_alloc(ifp);
+ ifindex_alloc(ifp, ife);
if_attach_internal(ifp, 1, ifc);
#ifdef DEV_BPF
@@ -4545,8 +4611,9 @@
IF_DB_PRINTF("%d", if_dunit);
IF_DB_PRINTF("%s", if_description);
IF_DB_PRINTF("%u", if_index);
+ IF_DB_PRINTF("%u", if_idx);
+ IF_DB_PRINTF("%u", if_idxgen);
IF_DB_PRINTF("%u", if_refcount);
- IF_DB_PRINTF("%d", if_index_reserved);
IF_DB_PRINTF("%p", if_softc);
IF_DB_PRINTF("%p", if_l2com);
IF_DB_PRINTF("%p", if_llsoftc);
@@ -4607,7 +4674,7 @@
db_printf("vnet=%p\n", curvnet);
#endif
for (idx = 1; idx <= V_if_index; idx++) {
- ifp = V_ifindex_table[idx];
+ ifp = V_ifindex_table[idx]->ife_ifnet;
if (ifp == NULL)
continue;
db_printf( "%20s ifp=%p\n", ifp->if_xname, ifp);
Index: sys/net/if_var.h
===================================================================
--- sys/net/if_var.h
+++ sys/net/if_var.h
@@ -301,16 +301,17 @@
LIST_ENTRY(ifnet) if_clones; /* interfaces of a cloner */
CK_STAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if (CK_) */
/* protected by if_addr_lock */
- u_char if_alloctype; /* if_type at time of allocation */
- uint8_t if_numa_domain; /* NUMA domain of device */
+ uint8_t if_alloctype; /* if_type at time of allocation */
+ uint8_t if_numa_domain; /* NUMA domain of device */
+ uint16_t if_index; /* numeric abbreviation for this if */
+ uint16_t if_idx; /* immutable index ... */
+ uint16_t if_idxgen; /* ... and its generation count */
/* Driver and protocol specific information that remains stable. */
void *if_softc; /* pointer to driver state */
void *if_llsoftc; /* link layer softc */
void *if_l2com; /* pointer to protocol bits */
const char *if_dname; /* driver name */
int if_dunit; /* unit or IF_DUNIT_NONE */
- u_short if_index; /* numeric abbreviation for this if */
- short if_index_reserved; /* spare space to grow if_index */
char if_xname[IFNAMSIZ]; /* external name (name + unit) */
char *if_description; /* interface description */

File Metadata

Mime Type
text/plain
Expires
Thu, Nov 7, 9:30 AM (22 h, 14 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14508578
Default Alt Text
D33265.diff (10 KB)

Event Timeline