Page MenuHomeFreeBSD

D34742.diff
No OneTemporary

D34742.diff

diff --git a/sys/net/iflib.h b/sys/net/iflib.h
--- a/sys/net/iflib.h
+++ b/sys/net/iflib.h
@@ -131,7 +131,9 @@
uint8_t ipi_mflags; /* packet mbuf flags */
uint32_t ipi_tcp_seq; /* tcp seqno */
- uint32_t __spare0__;
+ uint8_t ipi_ip_tos; /* IP ToS field data */
+ uint8_t __spare0__;
+ uint16_t __spare1__;
} *if_pkt_info_t;
typedef struct if_irq {
@@ -188,6 +190,7 @@
void (*ift_rxd_flush) (void *, uint16_t qsidx, uint8_t flidx, qidx_t pidx);
int (*ift_legacy_intr) (void *);
qidx_t (*ift_txq_select) (void *, struct mbuf *);
+ qidx_t (*ift_txq_select_v2) (void *, struct mbuf *, if_pkt_info_t);
} *if_txrx_t;
typedef struct if_softc_ctx {
@@ -416,6 +419,13 @@
* as ift_txq_select in struct if_txrx
*/
#define IFLIB_FEATURE_QUEUE_SELECT 1400050
+/*
+ * Driver can set its own TX queue selection function
+ * as ift_txq_select_v2 in struct if_txrx. This includes
+ * having iflib send L3+ extra header information to the
+ * function.
+ */
+#define IFLIB_FEATURE_QUEUE_SELECT_V2 1400073
/*
* These enum values are used in iflib_needs_restart to indicate to iflib
diff --git a/sys/net/iflib.c b/sys/net/iflib.c
--- a/sys/net/iflib.c
+++ b/sys/net/iflib.c
@@ -210,6 +210,7 @@
#define isc_rxd_flush ifc_txrx.ift_rxd_flush
#define isc_legacy_intr ifc_txrx.ift_legacy_intr
#define isc_txq_select ifc_txrx.ift_txq_select
+#define isc_txq_select_v2 ifc_txrx.ift_txq_select_v2
eventhandler_tag ifc_vlan_attach_event;
eventhandler_tag ifc_vlan_detach_event;
struct ether_addr ifc_mac;
@@ -3195,32 +3196,24 @@
#define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO)
#define IS_TX_OFFLOAD6(pi) ((pi)->ipi_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_TSO))
+/**
+ * Parses out ethernet header information in the given mbuf.
+ * Returns in pi: ipi_etype (EtherType) and ipi_ehdrlen (Ethernet header length)
+ *
+ * This will account for the VLAN header if present.
+ *
+ * XXX: This doesn't handle QinQ, which could prevent TX offloads for those
+ * types of packets.
+ */
static int
-iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
+iflib_parse_ether_header(if_pkt_info_t pi, struct mbuf **mp, uint64_t *pullups)
{
- if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx;
struct ether_vlan_header *eh;
struct mbuf *m;
m = *mp;
- if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) &&
- M_WRITABLE(m) == 0) {
- if ((m = m_dup(m, M_NOWAIT)) == NULL) {
- return (ENOMEM);
- } else {
- m_freem(*mp);
- DBG_COUNTER_INC(tx_frees);
- *mp = m;
- }
- }
-
- /*
- * Determine where frame payload starts.
- * Jump over vlan headers if already present,
- * helpful for QinQ too.
- */
if (__predict_false(m->m_len < sizeof(*eh))) {
- txq->ift_pullups++;
+ (*pullups)++;
if (__predict_false((m = m_pullup(m, sizeof(*eh))) == NULL))
return (ENOMEM);
}
@@ -3232,6 +3225,143 @@
pi->ipi_etype = ntohs(eh->evl_encap_proto);
pi->ipi_ehdrlen = ETHER_HDR_LEN;
}
+ *mp = m;
+
+ return (0);
+}
+
+/**
+ * Parse up to the L3 header and extract IPv4/IPv6 header information into pi.
+ * Currently this information includes: IP ToS value, IP header version/presence
+ *
+ * This is missing some checks and doesn't edit the packet content as it goes,
+ * unlike iflib_parse_header(), in order to keep the amount of code here minimal.
+ */
+static int
+iflib_parse_header_partial(if_pkt_info_t pi, struct mbuf **mp, uint64_t *pullups)
+{
+ struct mbuf *m;
+ int err;
+
+ *pullups = 0;
+ m = *mp;
+ if (!M_WRITABLE(m)) {
+ if ((m = m_dup(m, M_NOWAIT)) == NULL) {
+ return (ENOMEM);
+ } else {
+ m_freem(*mp);
+ DBG_COUNTER_INC(tx_frees);
+ *mp = m;
+ }
+ }
+
+ /* Fills out pi->ipi_etype */
+ err = iflib_parse_ether_header(pi, mp, pullups);
+ if (err)
+ return (err);
+ m = *mp;
+
+ switch (pi->ipi_etype) {
+#ifdef INET
+ case ETHERTYPE_IP:
+ {
+ struct mbuf *n;
+ struct ip *ip = NULL;
+ int miniplen;
+
+ miniplen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip));
+ if (__predict_false(m->m_len < miniplen)) {
+ /*
+ * Check for common case where the first mbuf only contains
+ * the Ethernet header
+ */
+ if (m->m_len == pi->ipi_ehdrlen) {
+ n = m->m_next;
+ MPASS(n);
+ /* If next mbuf contains at least the minimal IP header, then stop */
+ if (n->m_len >= sizeof(*ip)) {
+ ip = (struct ip *)n->m_data;
+ } else {
+ (*pullups)++;
+ if (__predict_false((m = m_pullup(m, miniplen)) == NULL))
+ return (ENOMEM);
+ ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
+ }
+ } else {
+ (*pullups)++;
+ if (__predict_false((m = m_pullup(m, miniplen)) == NULL))
+ return (ENOMEM);
+ ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
+ }
+ } else {
+ ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
+ }
+
+ /* Have the IPv4 header w/ no options here */
+ pi->ipi_ip_hlen = ip->ip_hl << 2;
+ pi->ipi_ipproto = ip->ip_p;
+ pi->ipi_ip_tos = ip->ip_tos;
+ pi->ipi_flags |= IPI_TX_IPV4;
+
+ break;
+ }
+#endif
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ {
+ struct ip6_hdr *ip6;
+
+ if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) {
+ (*pullups)++;
+ if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) == NULL))
+ return (ENOMEM);
+ }
+ ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen);
+
+ /* Have the IPv6 fixed header here */
+ pi->ipi_ip_hlen = sizeof(struct ip6_hdr);
+ pi->ipi_ipproto = ip6->ip6_nxt;
+ pi->ipi_ip_tos = IPV6_TRAFFIC_CLASS(ip6);
+ pi->ipi_flags |= IPI_TX_IPV6;
+
+ break;
+ }
+#endif
+ default:
+ pi->ipi_csum_flags &= ~CSUM_OFFLOAD;
+ pi->ipi_ip_hlen = 0;
+ break;
+ }
+ *mp = m;
+
+ return (0);
+
+}
+
+static int
+iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
+{
+ if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx;
+ struct mbuf *m;
+ int err;
+
+ m = *mp;
+ if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) &&
+ M_WRITABLE(m) == 0) {
+ if ((m = m_dup(m, M_NOWAIT)) == NULL) {
+ return (ENOMEM);
+ } else {
+ m_freem(*mp);
+ DBG_COUNTER_INC(tx_frees);
+ *mp = m;
+ }
+ }
+
+ /* Fills out pi->ipi_etype */
+ err = iflib_parse_ether_header(pi, mp, &txq->ift_pullups);
+ if (__predict_false(err))
+ return (err);
+ m = *mp;
switch (pi->ipi_etype) {
#ifdef INET
@@ -3276,6 +3406,7 @@
}
pi->ipi_ip_hlen = ip->ip_hl << 2;
pi->ipi_ipproto = ip->ip_p;
+ pi->ipi_ip_tos = ip->ip_tos;
pi->ipi_flags |= IPI_TX_IPV4;
/* TCP checksum offload may require TCP header length */
@@ -3329,6 +3460,7 @@
/* XXX-BZ this will go badly in case of ext hdrs. */
pi->ipi_ipproto = ip6->ip6_nxt;
+ pi->ipi_ip_tos = IPV6_TRAFFIC_CLASS(ip6);
pi->ipi_flags |= IPI_TX_IPV6;
/* TCP checksum offload may require TCP header length */
@@ -4146,11 +4278,10 @@
static int
iflib_if_transmit(if_t ifp, struct mbuf *m)
{
- if_ctx_t ctx = if_getsoftc(ifp);
-
+ if_ctx_t ctx = if_getsoftc(ifp);
iflib_txq_t txq;
int err, qidx;
- int abdicate = ctx->ifc_sysctl_tx_abdicate;
+ int abdicate;
if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) {
DBG_COUNTER_INC(tx_frees);
@@ -4162,7 +4293,24 @@
/* ALTQ-enabled interfaces always use queue 0. */
qidx = 0;
/* Use driver-supplied queue selection method if it exists */
- if (ctx->isc_txq_select)
+ if (ctx->isc_txq_select_v2) {
+ struct if_pkt_info pi;
+ uint64_t early_pullups = 0;
+ pkt_info_zero(&pi);
+
+ err = iflib_parse_header_partial(&pi, &m, &early_pullups);
+ if (__predict_false(err != 0)) {
+ /* Assign pullups for bad pkts to default queue */
+ ctx->ifc_txqs[0].ift_pullups += early_pullups;
+ DBG_COUNTER_INC(encap_txd_encap_fail);
+ return (err);
+ }
+ /* Let driver make queueing decision */
+ qidx = ctx->isc_txq_select_v2(ctx->ifc_softc, m, &pi);
+ ctx->ifc_txqs[qidx].ift_pullups += early_pullups;
+ }
+ /* Backwards compatibility w/ simpler queue select */
+ else if (ctx->isc_txq_select)
qidx = ctx->isc_txq_select(ctx->ifc_softc, m);
/* If not, use iflib's standard method */
else if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m) && !ALTQ_IS_ENABLED(&ifp->if_snd))
@@ -4207,6 +4355,8 @@
}
#endif
DBG_COUNTER_INC(tx_seen);
+ abdicate = ctx->ifc_sysctl_tx_abdicate;
+
err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE, abdicate);
if (abdicate)
diff --git a/sys/sys/param.h b/sys/sys/param.h
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -76,7 +76,7 @@
* cannot include sys/param.h and should only be updated here.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 1400072
+#define __FreeBSD_version 1400073
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,

File Metadata

Mime Type
text/plain
Expires
Fri, Jan 10, 11:42 AM (14 h, 34 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15742261
Default Alt Text
D34742.diff (8 KB)

Event Timeline