Page MenuHomeFreeBSD

D38065.diff
No OneTemporary

D38065.diff

diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c
--- a/sys/dev/netmap/netmap_freebsd.c
+++ b/sys/dev/netmap/netmap_freebsd.c
@@ -387,15 +387,20 @@
* addr and len identify the netmap buffer, m is the (preallocated)
* mbuf to use for transmissions.
*
- * We should add a reference to the mbuf so the m_freem() at the end
- * of the transmission does not consume resources.
+ * Zero-copy transmission is possible if netmap is attached directly to a
+ * hardware interface: when cleaning we simply wait for the mbuf cluster
+ * refcount to decrement to 1, indicating that the driver has completed
+ * transmission and is done with the buffer. However, this approach can
+ * lead to queue deadlocks when attaching to software interfaces (e.g.,
+ * if_bridge) since we cannot rely on member ports to promptly reclaim
+ * transmitted mbufs. Since there is no easy way to distinguish these
+ * cases, we currently always copy the buffer.
*
- * On FreeBSD, and on multiqueue cards, we can force the queue using
+ * On multiqueue cards, we can force the queue using
* if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
* i = m->m_pkthdr.flowid % adapter->num_queues;
* else
* i = curcpu % adapter->num_queues;
- *
*/
int
nm_os_generic_xmit_frame(struct nm_os_gen_arg *a)
@@ -405,16 +410,21 @@
if_t ifp = a->ifp;
struct mbuf *m = a->m;
- /* Link the external storage to
- * the netmap buffer, so that no copy is necessary. */
- m->m_ext.ext_buf = m->m_data = a->addr;
- m->m_ext.ext_size = len;
+ M_ASSERTPKTHDR(m);
+ KASSERT((m->m_flags & M_EXT) != 0,
+ ("%s: mbuf %p has no cluster", __func__, m));
- m->m_flags |= M_PKTHDR;
- m->m_len = m->m_pkthdr.len = len;
+ if (MBUF_REFCNT(m) != 1) {
+ nm_prerr("invalid refcnt %d for %p", MBUF_REFCNT(m), m);
+ panic("in generic_xmit_frame");
+ }
+ if (unlikely(m->m_ext.ext_size < len)) {
+ nm_prlim(2, "size %d < len %d", m->m_ext.ext_size, len);
+ len = m->m_ext.ext_size;
+ }
- /* mbuf refcnt is not contended, no need to use atomic
- * (a memory barrier is enough). */
+ m_copyback(m, 0, len, a->addr);
+ m->m_len = m->m_pkthdr.len = len;
SET_MBUF_REFCNT(m, 2);
M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
m->m_pkthdr.flowid = a->ring_nr;
@@ -425,7 +435,6 @@
return ret ? -1 : 0;
}
-
struct netmap_adapter *
netmap_getna(if_t ifp)
{
diff --git a/sys/dev/netmap/netmap_generic.c b/sys/dev/netmap/netmap_generic.c
--- a/sys/dev/netmap/netmap_generic.c
+++ b/sys/dev/netmap/netmap_generic.c
@@ -272,6 +272,7 @@
}
for_each_tx_kring(r, kring, na) {
+ callout_drain(&kring->tx_event_callout);
mtx_destroy(&kring->tx_event_lock);
if (kring->tx_pool == NULL) {
continue;
@@ -357,6 +358,9 @@
}
mtx_init(&kring->tx_event_lock, "tx_event_lock",
NULL, MTX_SPIN);
+ callout_init_mtx(&kring->tx_event_callout,
+ &kring->tx_event_lock,
+ CALLOUT_RETURNUNLOCKED);
}
}
@@ -430,7 +434,7 @@
* the NIC notifies the driver that transmission is completed.
*/
static void
-generic_mbuf_destructor(struct mbuf *m)
+generic_mbuf_dtor(struct mbuf *m)
{
struct netmap_adapter *na = NA(GEN_TX_MBUF_IFP(m));
struct netmap_kring *kring;
@@ -473,7 +477,14 @@
if (++r == na->num_tx_rings) r = 0;
if (r == r_orig) {
+#ifndef __FreeBSD__
+ /*
+ * On FreeBSD this situation can arise if the tx_event
+ * callout handler cleared a stuck packet.
+ */
nm_prlim(1, "Cannot match event %p", m);
+#endif
+ nm_generic_mbuf_dtor(m);
return;
}
}
@@ -481,9 +492,7 @@
/* Second, wake up clients. They will reclaim the event through
* txsync. */
netmap_generic_irq(na, r, NULL);
-#ifdef __FreeBSD__
- void_mbuf_dtor(m);
-#endif
+ nm_generic_mbuf_dtor(m);
}
/* Record completed transmissions and update hwtail.
@@ -537,7 +546,6 @@
}
/* The event has been consumed, we can go
* ahead. */
-
} else if (MBUF_REFCNT(m) != 1) {
/* This mbuf is still busy: its refcnt is 2. */
break;
@@ -577,6 +585,18 @@
return e;
}
+#ifdef __FreeBSD__
+static void
+generic_tx_callout(void *arg)
+{
+ struct netmap_kring *kring = arg;
+
+ kring->tx_event = NULL;
+ mtx_unlock_spin(&kring->tx_event_lock);
+ netmap_generic_irq(kring->na, kring->ring_id, NULL);
+}
+#endif
+
static void
generic_set_tx_event(struct netmap_kring *kring, u_int hwcur)
{
@@ -620,8 +640,24 @@
return;
}
- SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor);
+ SET_MBUF_DESTRUCTOR(m, generic_mbuf_dtor);
kring->tx_event = m;
+#ifdef __FreeBSD__
+ /*
+ * Handle the possibility that the transmitted buffer isn't reclaimed
+ * within a bounded period of time. This can arise when transmitting
+ * out of multiple ports via a lagg or bridge interface, since the
+ * member ports may legitimately only free transmitted buffers in
+ * batches.
+ *
+ * The callout handler clears the stuck packet from the ring, allowing
+ * transmission to proceed. In the common case we let
+ * generic_mbuf_dtor() unstick the ring, allowing mbufs to be
+ * reused most of the time.
+ */
+ callout_reset_sbt_curcpu(&kring->tx_event_callout, SBT_1MS, 0,
+ generic_tx_callout, kring, 0);
+#endif
mtx_unlock_spin(&kring->tx_event_lock);
kring->tx_pool[e] = NULL;
@@ -631,10 +667,8 @@
/* Decrement the refcount. This will free it if we lose the race
* with the driver. */
m_freem(m);
- smp_mb();
}
-
/*
* generic_netmap_txsync() transforms netmap buffers into mbufs
* and passes them to the standard device driver
@@ -712,6 +746,8 @@
break;
}
IFRATE(rate_ctx.new.txrepl++);
+ } else {
+ nm_os_mbuf_reinit(m);
}
a.m = m;
@@ -783,9 +819,6 @@
#endif
}
- /*
- * Second, reclaim completed buffers
- */
if (!gna->txqdisc && (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring))) {
/* No more available slots? Set a notification event
* on a netmap slot that will be cleaned in the future.
@@ -795,6 +828,9 @@
generic_set_tx_event(kring, nm_i);
}
+ /*
+ * Second, reclaim completed buffers
+ */
generic_netmap_tx_clean(kring, gna->txqdisc);
return 0;
diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h
--- a/sys/dev/netmap/netmap_kern.h
+++ b/sys/dev/netmap/netmap_kern.h
@@ -501,6 +501,9 @@
struct mbuf **tx_pool;
struct mbuf *tx_event; /* TX event used as a notification */
NM_LOCK_T tx_event_lock; /* protects the tx_event mbuf */
+#ifdef __FreeBSD__
+ struct callout tx_event_callout;
+#endif
struct mbq rx_queue; /* intercepted rx mbufs. */
uint32_t users; /* existing bindings for this ring */
@@ -2381,39 +2384,59 @@
*
* We allocate mbufs with m_gethdr(), since the mbuf header is needed
* by the driver. We also attach a customly-provided external storage,
- * which in this case is a netmap buffer. When calling m_extadd(), however
- * we pass a NULL address, since the real address (and length) will be
- * filled in by nm_os_generic_xmit_frame() right before calling
- * if_transmit().
+ * which in this case is a netmap buffer.
*
* The dtor function does nothing, however we need it since mb_free_ext()
* has a KASSERT(), checking that the mbuf dtor function is not NULL.
*/
-static void void_mbuf_dtor(struct mbuf *m) { }
+static inline void
+nm_generic_mbuf_dtor(struct mbuf *m)
+{
+ uma_zfree(zone_clust, m->m_ext.ext_buf);
+}
#define SET_MBUF_DESTRUCTOR(m, fn) do { \
(m)->m_ext.ext_free = (fn != NULL) ? \
- (void *)fn : (void *)void_mbuf_dtor; \
+ (void *)fn : (void *)nm_generic_mbuf_dtor; \
} while (0)
static inline struct mbuf *
-nm_os_get_mbuf(if_t ifp, int len)
+nm_os_get_mbuf(if_t ifp __unused, int len)
{
struct mbuf *m;
+ void *buf;
- (void)ifp;
- (void)len;
+ KASSERT(len <= MCLBYTES, ("%s: len %d", __func__, len));
m = m_gethdr(M_NOWAIT, MT_DATA);
- if (m == NULL) {
- return m;
+ if (__predict_false(m == NULL))
+ return (NULL);
+ buf = uma_zalloc(zone_clust, M_NOWAIT);
+ if (__predict_false(buf == NULL)) {
+ m_free(m);
+ return (NULL);
}
+ m_extadd(m, buf, MCLBYTES, nm_generic_mbuf_dtor, NULL, NULL, 0,
+ EXT_NET_DRV);
+ return (m);
+}
- m_extadd(m, NULL /* buf */, 0 /* size */, void_mbuf_dtor,
- NULL, NULL, 0, EXT_NET_DRV);
-
- return m;
+static inline void
+nm_os_mbuf_reinit(struct mbuf *m)
+{
+ void *buf;
+
+ KASSERT((m->m_flags & M_EXT) != 0,
+ ("%s: mbuf %p has no external storage", __func__, m));
+ KASSERT(m->m_ext.ext_size == MCLBYTES,
+ ("%s: mbuf %p has wrong external storage size %u", __func__, m,
+ m->m_ext.ext_size));
+
+ buf = m->m_ext.ext_buf;
+ m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR);
+ m_extadd(m, buf, MCLBYTES, nm_generic_mbuf_dtor, NULL, NULL, 0,
+ EXT_NET_DRV);
}
#endif /* __FreeBSD__ */

File Metadata

Mime Type
text/plain
Expires
Mon, Nov 18, 7:29 PM (21 h, 26 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14703527
Default Alt Text
D38065.diff (8 KB)

Event Timeline