Page MenuHomeFreeBSD

D36043.diff
No OneTemporary

D36043.diff

diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c
--- a/sys/netinet/tcp_lro.c
+++ b/sys/netinet/tcp_lro.c
@@ -91,7 +91,7 @@
#ifdef TCPHPTS
static bool do_bpf_strip_and_compress(struct inpcb *, struct lro_ctrl *,
struct lro_entry *, struct mbuf **, struct mbuf **, struct mbuf **,
- bool *, bool, bool, struct ifnet *);
+ bool *, bool, bool, struct ifnet *, bool);
#endif
@@ -119,6 +119,11 @@
CTLFLAG_RDTUN | CTLFLAG_MPSAFE, &tcp_lro_cpu_set_thresh, 0,
"Number of interrupts in a row on the same CPU that will make us declare an 'affinity' cpu?");
+static uint32_t tcp_less_accurate_lro_ts = 0;
+SYSCTL_UINT(_net_inet_tcp_lro, OID_AUTO, lro_less_accurate,
+ CTLFLAG_MPSAFE, &tcp_less_accurate_lro_ts, 0,
+ "Do we trade off efficency by doing less timestamp operations for time accuracy?");
+
SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, fullqueue, CTLFLAG_RD,
&tcp_inp_lro_direct_queue, "Number of lro's fully queued to transport");
SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, wokeup, CTLFLAG_RD,
@@ -598,6 +603,29 @@
}
}
+static void
+tcp_lro_flush_active(struct lro_ctrl *lc)
+{
+ struct lro_entry *le;
+
+ /*
+ * Walk through the list of le entries, and
+ * any one that does have packets flush. This
+ * is called because we have an inbound packet
+ * (e.g. SYN) that has to have all others flushed
+ * in front of it. Note we have to do the remove
+ * because tcp_lro_flush() assumes that the entry
+ * is being freed. This is ok it will just get
+ * reallocated again like it was new.
+ */
+ LIST_FOREACH(le, &lc->lro_active, next) {
+ if (le->m_head != NULL) {
+ tcp_lro_active_remove(le);
+ tcp_lro_flush(lc, le);
+ }
+ }
+}
+
void
tcp_lro_flush_inactive(struct lro_ctrl *lc, const struct timeval *timeout)
{
@@ -664,9 +692,9 @@
log.u_bbr.flex2 = m->m_pkthdr.len;
else
log.u_bbr.flex2 = 0;
- log.u_bbr.flex3 = le->m_head->m_pkthdr.lro_nsegs;
- log.u_bbr.flex4 = le->m_head->m_pkthdr.lro_tcp_d_len;
if (le->m_head) {
+ log.u_bbr.flex3 = le->m_head->m_pkthdr.lro_nsegs;
+ log.u_bbr.flex4 = le->m_head->m_pkthdr.lro_tcp_d_len;
log.u_bbr.flex5 = le->m_head->m_pkthdr.len;
log.u_bbr.delRate = le->m_head->m_flags;
log.u_bbr.rttProp = le->m_head->m_pkthdr.rcv_tstmp;
@@ -1161,7 +1189,7 @@
static struct mbuf *
tcp_lro_get_last_if_ackcmp(struct lro_ctrl *lc, struct lro_entry *le,
- struct inpcb *inp, int32_t *new_m)
+ struct inpcb *inp, int32_t *new_m, bool can_append_old_cmp)
{
struct tcpcb *tp;
struct mbuf *m;
@@ -1171,19 +1199,22 @@
return (NULL);
/* Look at the last mbuf if any in queue */
- m = tp->t_tail_pkt;
- if (m != NULL && (m->m_flags & M_ACKCMP) != 0) {
- if (M_TRAILINGSPACE(m) >= sizeof(struct tcp_ackent)) {
- tcp_lro_log(tp, lc, le, NULL, 23, 0, 0, 0, 0);
- *new_m = 0;
- counter_u64_add(tcp_extra_mbuf, 1);
- return (m);
- } else {
- /* Mark we ran out of space */
- inp->inp_flags2 |= INP_MBUF_L_ACKS;
+ if (can_append_old_cmp) {
+ m = tp->t_tail_pkt;
+ if (m != NULL && (m->m_flags & M_ACKCMP) != 0) {
+ if (M_TRAILINGSPACE(m) >= sizeof(struct tcp_ackent)) {
+ tcp_lro_log(tp, lc, le, NULL, 23, 0, 0, 0, 0);
+ *new_m = 0;
+ counter_u64_add(tcp_extra_mbuf, 1);
+ return (m);
+ } else {
+ /* Mark we ran out of space */
+ inp->inp_flags2 |= INP_MBUF_L_ACKS;
+ }
}
}
/* Decide mbuf size. */
+ tcp_lro_log(tp, lc, le, NULL, 21, 0, 0, 0, 0);
if (inp->inp_flags2 & INP_MBUF_L_ACKS)
m = m_getcl(M_NOWAIT, MT_DATA, M_ACKCMP | M_PKTHDR);
else
@@ -1194,6 +1225,7 @@
return (NULL);
}
counter_u64_add(tcp_comp_total, 1);
+ m->m_pkthdr.rcvif = lc->ifp;
m->m_flags |= M_ACKCMP;
*new_m = 1;
return (m);
@@ -1290,7 +1322,7 @@
struct tcpcb *tp;
struct mbuf **pp, *cmp, *mv_to;
struct ifnet *lagg_ifp;
- bool bpf_req, lagg_bpf_req, should_wake;
+ bool bpf_req, lagg_bpf_req, should_wake, can_append_old_cmp;
/* Check if packet doesn't belongs to our network interface. */
if ((tcplro_stacks_wanting_mbufq == 0) ||
@@ -1361,18 +1393,33 @@
}
/* Strip and compress all the incoming packets. */
+ can_append_old_cmp = true;
cmp = NULL;
for (pp = &le->m_head; *pp != NULL; ) {
mv_to = NULL;
if (do_bpf_strip_and_compress(inp, lc, le, pp,
&cmp, &mv_to, &should_wake, bpf_req,
- lagg_bpf_req, lagg_ifp) == false) {
+ lagg_bpf_req, lagg_ifp, can_append_old_cmp) == false) {
/* Advance to next mbuf. */
pp = &(*pp)->m_nextpkt;
+ /*
+ * Once we have appended we can't look in the pending
+ * inbound packets for a compressed ack to append to.
+ */
+ can_append_old_cmp = false;
+ /*
+ * Once we append we also need to stop adding to any
+ * compressed ack we were remembering. A new cmp
+ * ack will be required.
+ */
+ cmp = NULL;
+ tcp_lro_log(tp, lc, le, NULL, 25, 0, 0, 0, 0);
} else if (mv_to != NULL) {
/* We are asked to move pp up */
pp = &mv_to->m_nextpkt;
- }
+ tcp_lro_log(tp, lc, le, NULL, 24, 0, 0, 0, 0);
+ } else
+ tcp_lro_log(tp, lc, le, NULL, 26, 0, 0, 0, 0);
}
/* Update "m_last_mbuf", if any. */
if (pp == &le->m_head)
@@ -1562,6 +1609,8 @@
/* add packet to LRO engine */
if (tcp_lro_rx_common(lc, mb, 0, false) != 0) {
+ /* Flush anything we have acummulated */
+ tcp_lro_flush_active(lc);
/* input packet to network layer */
(*lc->ifp->if_input)(lc->ifp, mb);
lc->lro_queued++;
@@ -1589,10 +1638,11 @@
* entry.
*/
ae->timestamp = m->m_pkthdr.rcv_tstmp;
+ ae->flags = 0;
if (m->m_flags & M_TSTMP_LRO)
- ae->flags = TSTMP_LRO;
+ ae->flags |= TSTMP_LRO;
else if (m->m_flags & M_TSTMP)
- ae->flags = TSTMP_HDWR;
+ ae->flags |= TSTMP_HDWR;
ae->seq = ntohl(th->th_seq);
ae->ack = ntohl(th->th_ack);
ae->flags |= tcp_get_flags(th);
@@ -1612,7 +1662,7 @@
static bool
do_bpf_strip_and_compress(struct inpcb *inp, struct lro_ctrl *lc,
struct lro_entry *le, struct mbuf **pp, struct mbuf **cmp, struct mbuf **mv_to,
- bool *should_wake, bool bpf_req, bool lagg_bpf_req, struct ifnet *lagg_ifp)
+ bool *should_wake, bool bpf_req, bool lagg_bpf_req, struct ifnet *lagg_ifp, bool can_append_old_cmp)
{
union {
void *ptr;
@@ -1718,7 +1768,7 @@
/* Now lets get space if we don't have some already */
if (*cmp == NULL) {
new_one:
- nm = tcp_lro_get_last_if_ackcmp(lc, le, inp, &n_mbuf);
+ nm = tcp_lro_get_last_if_ackcmp(lc, le, inp, &n_mbuf, can_append_old_cmp);
if (__predict_false(nm == NULL))
goto done;
*cmp = nm;
@@ -1954,6 +2004,14 @@
binuptime(&lc->lro_last_queue_time);
CURVNET_SET(lc->ifp->if_vnet);
error = tcp_lro_rx_common(lc, m, csum, true);
+ if (__predict_false(error != 0)) {
+ /*
+ * Flush anything we have acummulated
+ * ahead of this packet that can't
+ * be LRO'd. This preserves order.
+ */
+ tcp_lro_flush_active(lc);
+ }
CURVNET_RESTORE();
return (error);
@@ -1978,6 +2036,16 @@
return;
}
+ /* If no hardware or arrival stamp on the packet add timestamp */
+ if ((tcplro_stacks_wanting_mbufq > 0) &&
+ (tcp_less_accurate_lro_ts == 0) &&
+ ((mb->m_flags & M_TSTMP) == 0)) {
+ /* Add in an LRO time since no hardware */
+ binuptime(&lc->lro_last_queue_time);
+ mb->m_pkthdr.rcv_tstmp = bintime2ns(&lc->lro_last_queue_time);
+ mb->m_flags |= M_TSTMP_LRO;
+ }
+
/* create sequence number */
lc->lro_mbuf_data[lc->lro_mbuf_count].seq =
(((uint64_t)M_HASHTYPE_GET(mb)) << 56) |

File Metadata

Mime Type
text/plain
Expires
Thu, Jan 23, 7:01 AM (20 h, 50 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16048959
Default Alt Text
D36043.diff (7 KB)

Event Timeline