Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F107059916
D34742.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
8 KB
Referenced Files
None
Subscribers
None
D34742.diff
View Options
diff --git a/sys/net/iflib.h b/sys/net/iflib.h
--- a/sys/net/iflib.h
+++ b/sys/net/iflib.h
@@ -131,7 +131,9 @@
uint8_t ipi_mflags; /* packet mbuf flags */
uint32_t ipi_tcp_seq; /* tcp seqno */
- uint32_t __spare0__;
+ uint8_t ipi_ip_tos; /* IP ToS field data */
+ uint8_t __spare0__;
+ uint16_t __spare1__;
} *if_pkt_info_t;
typedef struct if_irq {
@@ -188,6 +190,7 @@
void (*ift_rxd_flush) (void *, uint16_t qsidx, uint8_t flidx, qidx_t pidx);
int (*ift_legacy_intr) (void *);
qidx_t (*ift_txq_select) (void *, struct mbuf *);
+ qidx_t (*ift_txq_select_v2) (void *, struct mbuf *, if_pkt_info_t);
} *if_txrx_t;
typedef struct if_softc_ctx {
@@ -416,6 +419,13 @@
* as ift_txq_select in struct if_txrx
*/
#define IFLIB_FEATURE_QUEUE_SELECT 1400050
+/*
+ * Driver can set its own TX queue selection function
+ * as ift_txq_select_v2 in struct if_txrx. This includes
+ * having iflib send L3+ extra header information to the
+ * function.
+ */
+#define IFLIB_FEATURE_QUEUE_SELECT_V2 1400073
/*
* These enum values are used in iflib_needs_restart to indicate to iflib
diff --git a/sys/net/iflib.c b/sys/net/iflib.c
--- a/sys/net/iflib.c
+++ b/sys/net/iflib.c
@@ -210,6 +210,7 @@
#define isc_rxd_flush ifc_txrx.ift_rxd_flush
#define isc_legacy_intr ifc_txrx.ift_legacy_intr
#define isc_txq_select ifc_txrx.ift_txq_select
+#define isc_txq_select_v2 ifc_txrx.ift_txq_select_v2
eventhandler_tag ifc_vlan_attach_event;
eventhandler_tag ifc_vlan_detach_event;
struct ether_addr ifc_mac;
@@ -3195,32 +3196,24 @@
#define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO)
#define IS_TX_OFFLOAD6(pi) ((pi)->ipi_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_TSO))
+/**
+ * Parses out ethernet header information in the given mbuf.
+ * Returns in pi: ipi_etype (EtherType) and ipi_ehdrlen (Ethernet header length)
+ *
+ * This will account for the VLAN header if present.
+ *
+ * XXX: This doesn't handle QinQ, which could prevent TX offloads for those
+ * types of packets.
+ */
static int
-iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
+iflib_parse_ether_header(if_pkt_info_t pi, struct mbuf **mp, uint64_t *pullups)
{
- if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx;
struct ether_vlan_header *eh;
struct mbuf *m;
m = *mp;
- if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) &&
- M_WRITABLE(m) == 0) {
- if ((m = m_dup(m, M_NOWAIT)) == NULL) {
- return (ENOMEM);
- } else {
- m_freem(*mp);
- DBG_COUNTER_INC(tx_frees);
- *mp = m;
- }
- }
-
- /*
- * Determine where frame payload starts.
- * Jump over vlan headers if already present,
- * helpful for QinQ too.
- */
if (__predict_false(m->m_len < sizeof(*eh))) {
- txq->ift_pullups++;
+ (*pullups)++;
if (__predict_false((m = m_pullup(m, sizeof(*eh))) == NULL))
return (ENOMEM);
}
@@ -3232,6 +3225,143 @@
pi->ipi_etype = ntohs(eh->evl_encap_proto);
pi->ipi_ehdrlen = ETHER_HDR_LEN;
}
+ *mp = m;
+
+ return (0);
+}
+
+/**
+ * Parse up to the L3 header and extract IPv4/IPv6 header information into pi.
+ * Currently this information includes: IP ToS value, IP header version/presence
+ *
+ * This is missing some checks and doesn't edit the packet content as it goes,
+ * unlike iflib_parse_header(), in order to keep the amount of code here minimal.
+ */
+static int
+iflib_parse_header_partial(if_pkt_info_t pi, struct mbuf **mp, uint64_t *pullups)
+{
+ struct mbuf *m;
+ int err;
+
+ *pullups = 0;
+ m = *mp;
+ if (!M_WRITABLE(m)) {
+ if ((m = m_dup(m, M_NOWAIT)) == NULL) {
+ return (ENOMEM);
+ } else {
+ m_freem(*mp);
+ DBG_COUNTER_INC(tx_frees);
+ *mp = m;
+ }
+ }
+
+ /* Fills out pi->ipi_etype */
+ err = iflib_parse_ether_header(pi, mp, pullups);
+ if (err)
+ return (err);
+ m = *mp;
+
+ switch (pi->ipi_etype) {
+#ifdef INET
+ case ETHERTYPE_IP:
+ {
+ struct mbuf *n;
+ struct ip *ip = NULL;
+ int miniplen;
+
+ miniplen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip));
+ if (__predict_false(m->m_len < miniplen)) {
+ /*
+ * Check for common case where the first mbuf only contains
+ * the Ethernet header
+ */
+ if (m->m_len == pi->ipi_ehdrlen) {
+ n = m->m_next;
+ MPASS(n);
+ /* If next mbuf contains at least the minimal IP header, then stop */
+ if (n->m_len >= sizeof(*ip)) {
+ ip = (struct ip *)n->m_data;
+ } else {
+ (*pullups)++;
+ if (__predict_false((m = m_pullup(m, miniplen)) == NULL))
+ return (ENOMEM);
+ ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
+ }
+ } else {
+ (*pullups)++;
+ if (__predict_false((m = m_pullup(m, miniplen)) == NULL))
+ return (ENOMEM);
+ ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
+ }
+ } else {
+ ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
+ }
+
+ /* Have the IPv4 header w/ no options here */
+ pi->ipi_ip_hlen = ip->ip_hl << 2;
+ pi->ipi_ipproto = ip->ip_p;
+ pi->ipi_ip_tos = ip->ip_tos;
+ pi->ipi_flags |= IPI_TX_IPV4;
+
+ break;
+ }
+#endif
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ {
+ struct ip6_hdr *ip6;
+
+ if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) {
+ (*pullups)++;
+ if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) == NULL))
+ return (ENOMEM);
+ }
+ ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen);
+
+ /* Have the IPv6 fixed header here */
+ pi->ipi_ip_hlen = sizeof(struct ip6_hdr);
+ pi->ipi_ipproto = ip6->ip6_nxt;
+ pi->ipi_ip_tos = IPV6_TRAFFIC_CLASS(ip6);
+ pi->ipi_flags |= IPI_TX_IPV6;
+
+ break;
+ }
+#endif
+ default:
+ pi->ipi_csum_flags &= ~CSUM_OFFLOAD;
+ pi->ipi_ip_hlen = 0;
+ break;
+ }
+ *mp = m;
+
+ return (0);
+
+}
+
+static int
+iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
+{
+ if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx;
+ struct mbuf *m;
+ int err;
+
+ m = *mp;
+ if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) &&
+ M_WRITABLE(m) == 0) {
+ if ((m = m_dup(m, M_NOWAIT)) == NULL) {
+ return (ENOMEM);
+ } else {
+ m_freem(*mp);
+ DBG_COUNTER_INC(tx_frees);
+ *mp = m;
+ }
+ }
+
+ /* Fills out pi->ipi_etype */
+ err = iflib_parse_ether_header(pi, mp, &txq->ift_pullups);
+ if (__predict_false(err))
+ return (err);
+ m = *mp;
switch (pi->ipi_etype) {
#ifdef INET
@@ -3276,6 +3406,7 @@
}
pi->ipi_ip_hlen = ip->ip_hl << 2;
pi->ipi_ipproto = ip->ip_p;
+ pi->ipi_ip_tos = ip->ip_tos;
pi->ipi_flags |= IPI_TX_IPV4;
/* TCP checksum offload may require TCP header length */
@@ -3329,6 +3460,7 @@
/* XXX-BZ this will go badly in case of ext hdrs. */
pi->ipi_ipproto = ip6->ip6_nxt;
+ pi->ipi_ip_tos = IPV6_TRAFFIC_CLASS(ip6);
pi->ipi_flags |= IPI_TX_IPV6;
/* TCP checksum offload may require TCP header length */
@@ -4146,11 +4278,10 @@
static int
iflib_if_transmit(if_t ifp, struct mbuf *m)
{
- if_ctx_t ctx = if_getsoftc(ifp);
-
+ if_ctx_t ctx = if_getsoftc(ifp);
iflib_txq_t txq;
int err, qidx;
- int abdicate = ctx->ifc_sysctl_tx_abdicate;
+ int abdicate;
if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) {
DBG_COUNTER_INC(tx_frees);
@@ -4162,7 +4293,24 @@
/* ALTQ-enabled interfaces always use queue 0. */
qidx = 0;
/* Use driver-supplied queue selection method if it exists */
- if (ctx->isc_txq_select)
+ if (ctx->isc_txq_select_v2) {
+ struct if_pkt_info pi;
+ uint64_t early_pullups = 0;
+ pkt_info_zero(&pi);
+
+ err = iflib_parse_header_partial(&pi, &m, &early_pullups);
+ if (__predict_false(err != 0)) {
+ /* Assign pullups for bad pkts to default queue */
+ ctx->ifc_txqs[0].ift_pullups += early_pullups;
+ DBG_COUNTER_INC(encap_txd_encap_fail);
+ return (err);
+ }
+ /* Let driver make queueing decision */
+ qidx = ctx->isc_txq_select_v2(ctx->ifc_softc, m, &pi);
+ ctx->ifc_txqs[qidx].ift_pullups += early_pullups;
+ }
+ /* Backwards compatibility w/ simpler queue select */
+ else if (ctx->isc_txq_select)
qidx = ctx->isc_txq_select(ctx->ifc_softc, m);
/* If not, use iflib's standard method */
else if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m) && !ALTQ_IS_ENABLED(&ifp->if_snd))
@@ -4207,6 +4355,8 @@
}
#endif
DBG_COUNTER_INC(tx_seen);
+ abdicate = ctx->ifc_sysctl_tx_abdicate;
+
err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE, abdicate);
if (abdicate)
diff --git a/sys/sys/param.h b/sys/sys/param.h
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -76,7 +76,7 @@
* cannot include sys/param.h and should only be updated here.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 1400072
+#define __FreeBSD_version 1400073
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Jan 10, 11:42 AM (14 h, 34 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15742261
Default Alt Text
D34742.diff (8 KB)
Attached To
Mode
D34742: iflib: Introduce v2 of TX Queue Select Functionality
Attached
Detach File
Event Timeline
Log In to Comment