Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F108637179
D36303.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
22 KB
Referenced Files
None
Subscribers
None
D36303.diff
View Options
diff --git a/share/man/man4/tcp.4 b/share/man/man4/tcp.4
--- a/share/man/man4/tcp.4
+++ b/share/man/man4/tcp.4
@@ -31,7 +31,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd November 30, 2023
+.Dd January 17, 2023
.Dt TCP 4
.Os
.Sh NAME
@@ -504,6 +504,9 @@
specific connection.
This is needed to help with connection establishment
when a broken firewall is in the network path.
+.It Va ecn.option
+Reflect back the number of received bytes with a particular ECN marking
+by using the Accurate ECN TCP option on each outgoing packet.
.It Va fast_finwait2_recycle
Recycle
.Tn TCP
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
--- a/sys/netinet/tcp.h
+++ b/sys/netinet/tcp.h
@@ -121,6 +121,10 @@
#define TCPOLEN_SIGNATURE 18
#define TCPOPT_FAST_OPEN 34
#define TCPOLEN_FAST_OPEN_EMPTY 2
+#define TCPOPT_ACCECN0 0xAC
+#define TCPOPT_ACCECN1 0XAE
+#define TCPOLEN_ACCECN_EMPTY 2
+#define TCPOLEN_ACCECN_COUNTER 3
#define MAX_TCPOPTLEN 40 /* Absolute maximum TCP options len */
@@ -431,12 +435,12 @@
/* Accurate ECN counters. */
u_int32_t tcpi_delivered_ce;
u_int32_t tcpi_received_ce; /* # of CE marks received */
- u_int32_t __tcpi_delivered_e1_bytes;
- u_int32_t __tcpi_delivered_e0_bytes;
- u_int32_t __tcpi_delivered_ce_bytes;
- u_int32_t __tcpi_received_e1_bytes;
- u_int32_t __tcpi_received_e0_bytes;
- u_int32_t __tcpi_received_ce_bytes;
+ u_int32_t tcpi_delivered_e1_bytes;
+ u_int32_t tcpi_delivered_e0_bytes;
+ u_int32_t tcpi_delivered_ce_bytes;
+ u_int32_t tcpi_received_e1_bytes;
+ u_int32_t tcpi_received_e0_bytes;
+ u_int32_t tcpi_received_ce_bytes;
u_int32_t tcpi_total_tlp; /* tail loss probes sent */
u_int64_t tcpi_total_tlp_bytes; /* tail loss probe bytes sent */
diff --git a/sys/netinet/tcp_ecn.h b/sys/netinet/tcp_ecn.h
--- a/sys/netinet/tcp_ecn.h
+++ b/sys/netinet/tcp_ecn.h
@@ -49,6 +49,24 @@
int tcp_ecn_syncache_add(uint16_t, int);
uint16_t tcp_ecn_syncache_respond(uint16_t, struct syncache *);
+static inline void hton24(u_char **p, uint32_t v)
+{
+ *(*p)++ = (u_char)(v >> 16);
+ *(*p)++ = (u_char)(v >> 8);
+ *(*p)++ = (u_char)(v);
+}
+
+static inline uint32_t ntoh24(u_char *p)
+{
+ uint32_t v;
+
+ v = (uint32_t)(p[0] << 16);
+ v |= (uint32_t)(p[1] << 8);
+ v |= (uint32_t)(p[2] << 0);
+ return v;
+}
+
+
#endif /* _KERNEL */
#endif /* _NETINET_TCP_ECN_H_ */
diff --git a/sys/netinet/tcp_ecn.c b/sys/netinet/tcp_ecn.c
--- a/sys/netinet/tcp_ecn.c
+++ b/sys/netinet/tcp_ecn.c
@@ -113,6 +113,11 @@
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_ecn_maxretries), 0,
"Max retries before giving up on ECN");
+VNET_DEFINE(int, tcp_ecn_option) = 0;
+SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, option,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_ecn_option), 0,
+ "Use AccECN TCP option");
+
/*
* Process incoming SYN,ACK packet
*/
@@ -156,7 +161,9 @@
case (0|TH_CWR|0):
tp->t_flags2 |= TF2_ACE_PERMIT;
tp->t_flags2 &= ~TF2_ECN_PERMIT;
- tp->t_scep = 5;
+ if (V_tcp_ecn_option)
+ tp->t_flags |= TF_ACCECN_OPT;
+ tp->t_ae.scep = 5;
TCPSTAT_INC(tcps_ecn_shs);
TCPSTAT_INC(tcps_ace_nect);
break;
@@ -164,7 +171,9 @@
case (TH_AE|0|0):
tp->t_flags2 |= TF2_ACE_PERMIT;
tp->t_flags2 &= ~TF2_ECN_PERMIT;
- tp->t_scep = 5;
+ if (V_tcp_ecn_option)
+ tp->t_flags |= TF_ACCECN_OPT;
+ tp->t_ae.scep = 5;
TCPSTAT_INC(tcps_ecn_shs);
TCPSTAT_INC(tcps_ace_ect0);
break;
@@ -172,7 +181,9 @@
case (0|TH_CWR|TH_ECE):
tp->t_flags2 |= TF2_ACE_PERMIT;
tp->t_flags2 &= ~TF2_ECN_PERMIT;
- tp->t_scep = 5;
+ if (V_tcp_ecn_option)
+ tp->t_flags |= TF_ACCECN_OPT;
+ tp->t_ae.scep = 5;
TCPSTAT_INC(tcps_ecn_shs);
TCPSTAT_INC(tcps_ace_ect1);
break;
@@ -180,7 +191,9 @@
case (TH_AE|TH_CWR|0):
tp->t_flags2 |= TF2_ACE_PERMIT;
tp->t_flags2 &= ~TF2_ECN_PERMIT;
- tp->t_scep = 6;
+ if (V_tcp_ecn_option)
+ tp->t_flags |= TF_ACCECN_OPT;
+ tp->t_ae.scep = 6;
/*
* reduce the IW to 2 MSS (to
* account for delayed acks) if
@@ -203,16 +216,16 @@
*/
switch (iptos & IPTOS_ECN_MASK) {
case (IPTOS_ECN_NOTECT):
- tp->t_rcep = 0b010;
+ tp->t_ae.rcep = 0b010;
break;
case (IPTOS_ECN_ECT0):
- tp->t_rcep = 0b100;
+ tp->t_ae.rcep = 0b100;
break;
case (IPTOS_ECN_ECT1):
- tp->t_rcep = 0b011;
+ tp->t_ae.rcep = 0b011;
break;
case (IPTOS_ECN_CE):
- tp->t_rcep = 0b110;
+ tp->t_ae.rcep = 0b110;
break;
}
break;
@@ -259,6 +272,8 @@
case (TH_AE|TH_CWR|TH_ECE):
tp->t_flags2 |= TF2_ACE_PERMIT;
tp->t_flags2 &= ~TF2_ECN_PERMIT;
+ if (V_tcp_ecn_option)
+ tp->t_flags |= TF_ACCECN_OPT;
TCPSTAT_INC(tcps_ecn_shs);
/*
* Set the AccECN Codepoints on
@@ -269,16 +284,16 @@
*/
switch (iptos & IPTOS_ECN_MASK) {
case (IPTOS_ECN_NOTECT):
- tp->t_rcep = 0b010;
+ tp->t_ae.rcep = 0b010;
break;
case (IPTOS_ECN_ECT0):
- tp->t_rcep = 0b100;
+ tp->t_ae.rcep = 0b100;
break;
case (IPTOS_ECN_ECT1):
- tp->t_rcep = 0b011;
+ tp->t_ae.rcep = 0b011;
break;
case (IPTOS_ECN_CE):
- tp->t_rcep = 0b110;
+ tp->t_ae.rcep = 0b110;
break;
}
break;
@@ -306,18 +321,31 @@
TCPSTAT_INC(tcps_ecn_rcvect1);
break;
}
-
if (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) {
if (tp->t_flags2 & TF2_ACE_PERMIT) {
- if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
- tp->t_rcep += 1;
+ switch (iptos & IPTOS_ECN_MASK) {
+ case IPTOS_ECN_CE:
+ tp->t_flags2 |= TF2_ACO_CE;
+ tp->t_ae.rceb += tlen;
+ tp->t_ae.rcep++;
+ break;
+ case IPTOS_ECN_ECT0:
+ tp->t_flags2 |= TF2_ACO_E0;
+ tp->t_ae.re0b += tlen;
+ break;
+ case IPTOS_ECN_ECT1:
+ tp->t_flags2 |= TF2_ACO_E1;
+ tp->t_ae.re1b += tlen;
+ break;
+ }
if (tp->t_flags2 & TF2_ECN_PERMIT) {
delta_cep = (tcp_ecn_get_ace(thflags) + 8 -
- (tp->t_scep & 7)) & 7;
+ (tp->t_ae.scep & 7)) & 7;
if (delta_cep < pkts)
delta_cep = pkts -
((pkts - delta_cep) & 7);
- tp->t_scep += delta_cep;
+ tp->t_ae.scep += delta_cep;
+ tp->t_ae.dcep = delta_cep;
} else {
/*
* process the final ACK of the 3WHS
@@ -332,16 +360,16 @@
/* FALLTHROUGH */
case 0b100:
/* ECT0 SYN or SYN,ACK */
- tp->t_scep = 5;
+ tp->t_ae.scep = 5;
break;
case 0b110:
/* CE SYN or SYN,ACK */
- tp->t_scep = 6;
+ tp->t_ae.scep = 6;
tp->snd_cwnd = 2 * tcp_maxseg(tp);
break;
default:
/* mangled AccECN handshake */
- tp->t_scep = 5;
+ tp->t_ae.scep = 5;
break;
}
tp->t_flags2 |= TF2_ECN_PERMIT;
@@ -350,7 +378,7 @@
/* RFC3168 ECN handling */
if ((thflags & (TH_SYN | TH_ECE)) == TH_ECE) {
delta_cep = 1;
- tp->t_scep++;
+ tp->t_ae.scep++;
}
if (thflags & TH_CWR) {
tp->t_flags2 &= ~TF2_ECN_SND_ECE;
@@ -429,16 +457,16 @@
* Reply with proper ECN notifications.
*/
if (tp->t_flags2 & TF2_ACE_PERMIT) {
- tcp_ecn_set_ace(thflags, tp->t_rcep);
+ tcp_ecn_set_ace(thflags, tp->t_ae.rcep);
if (!(tp->t_flags2 & TF2_ECN_PERMIT)) {
/*
* here we process the final
* ACK of the 3WHS
*/
- if (tp->t_rcep == 0b110) {
- tp->t_rcep = 6;
+ if (tp->t_ae.rcep == 0b110) {
+ tp->t_ae.rcep = 6;
} else {
- tp->t_rcep = 5;
+ tp->t_ae.rcep = 5;
}
tp->t_flags2 |= TF2_ECN_PERMIT;
}
@@ -451,7 +479,6 @@
if (tp->t_flags2 & TF2_ECN_SND_ECE)
*thflags |= TH_ECE;
}
-
return ipecn;
}
@@ -473,13 +500,17 @@
/* FALLTHROUGH */
case SCF_ACE_1:
tp->t_flags2 |= TF2_ACE_PERMIT;
- tp->t_scep = 5;
- tp->t_rcep = 5;
+ if (V_tcp_ecn_option)
+ tp->t_flags |= TF_ACCECN_OPT;
+ tp->t_ae.scep = 5;
+ tp->t_ae.rcep = 5;
break;
case SCF_ACE_CE:
tp->t_flags2 |= TF2_ACE_PERMIT;
- tp->t_scep = 6;
- tp->t_rcep = 6;
+ if (V_tcp_ecn_option)
+ tp->t_flags |= TF_ACCECN_OPT;
+ tp->t_ae.scep = 6;
+ tp->t_ae.rcep = 6;
break;
}
}
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -997,6 +997,8 @@
}
tp = intotcpcb(inp);
+ to.to_ae = &tp->t_ae;
+
switch (tp->t_state) {
case TCPS_TIME_WAIT:
/*
@@ -1523,7 +1525,7 @@
int acked, ourfinisacked, needoutput = 0;
sackstatus_t sack_changed;
int rstreason, todrop, win, incforsyn = 0;
- uint32_t tiwin;
+ uint32_t tiwin, old_sceb;
uint16_t nsegs;
char *s;
struct inpcb *inp = tptoinpcb(tp);
@@ -1537,6 +1539,7 @@
thflags = tcp_get_flags(th);
tp->sackhint.last_sack_ack = 0;
sack_changed = SACK_NOCHANGE;
+ to.to_ae = &tp->t_ae;
nsegs = max(1, m->m_pkthdr.lro_nsegs);
NET_EPOCH_ASSERT();
@@ -1608,9 +1611,15 @@
/*
* Parse options on any incoming segment.
*/
+ old_sceb = tp->t_ae.sceb;
tcp_dooptions(&to, (u_char *)(th + 1),
(th->th_off << 2) - sizeof(struct tcphdr),
(thflags & TH_SYN) ? TO_SYN : 0);
+ if ((to.to_flags & TOF_ACCE_CE) &&
+ (tp->t_ae.dcep != 0) &&
+ ((tp->t_ae.sceb - old_sceb) == 0))
+ tp->t_ae.scep -= tp->t_ae.dcep;
+
#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
if ((tp->t_flags & TF_SIGNATURE) != 0 &&
@@ -3463,7 +3472,7 @@
void
tcp_dooptions(struct tcpopt *to, u_char *cp, int cnt, int flags)
{
- int opt, optlen;
+ int opt, optlen, tmp;
to->to_flags = 0;
for (; cnt > 0; cnt -= optlen, cp += optlen) {
@@ -3556,6 +3565,48 @@
to->to_tfo_len = optlen - 2;
to->to_tfo_cookie = to->to_tfo_len ? cp + 2 : NULL;
break;
+ case TCPOPT_ACCECN0:
+ case TCPOPT_ACCECN1:
+ to->to_flags |= TOF_ACCECNOPT;
+ if (optlen >= (TCPOLEN_ACCECN_EMPTY +
+ 1 * TCPOLEN_ACCECN_COUNTER)) {
+ tmp = ntoh24(cp + TCPOLEN_ACCECN_EMPTY + 0);
+ if (opt == TCPOPT_ACCECN0) {
+ to->to_flags |= TOF_ACCE_E0;
+ tmp -= (to->to_ae->se0b & 0xFFFFFF);
+ if (tmp > 0)
+ to->to_ae->se0b += tmp;
+ } else {
+ to->to_flags |= TOF_ACCE_E1;
+ tmp -= (to->to_ae->se1b & 0xFFFFFF);
+ if (tmp > 0)
+ to->to_ae->se1b += tmp;
+ }
+ }
+ if (optlen >= (TCPOLEN_ACCECN_EMPTY +
+ 2 * TCPOLEN_ACCECN_COUNTER)) {
+ to->to_flags |= TOF_ACCE_CE;
+ tmp = ntoh24(cp + TCPOLEN_ACCECN_EMPTY + 3);
+ tmp -= (to->to_ae->sceb & 0xFFFFFF);
+ if (tmp > 0)
+ to->to_ae->sceb += tmp;
+ }
+ if (optlen >= (TCPOLEN_ACCECN_EMPTY +
+ 3 * TCPOLEN_ACCECN_COUNTER)) {
+ tmp = ntoh24(cp + TCPOLEN_ACCECN_EMPTY + 6);
+ if (opt == TCPOPT_ACCECN0) {
+ to->to_flags |= TOF_ACCE_E1;
+ tmp -= (to->to_ae->se1b & 0xFFFFFF);
+ if (tmp > 0)
+ to->to_ae->se1b += tmp;
+ } else {
+ to->to_flags |= TOF_ACCE_E0;
+ tmp -= (to->to_ae->se0b & 0xFFFFFF);
+ if (tmp > 0)
+ to->to_ae->se0b += tmp;
+ }
+ }
+ break;
default:
continue;
}
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -589,10 +589,14 @@
* Note: this may not work when tcp headers change
* very dynamically in the future.
*/
- if ((((tp->t_flags & TF_SIGNATURE) ?
+ if ((min(TCP_MAXOLEN,
+ (((tp->t_flags & TF_SIGNATURE) ?
PADTCPOLEN(TCPOLEN_SIGNATURE) : 0) +
((tp->t_flags & TF_RCVD_TSTMP) ?
PADTCPOLEN(TCPOLEN_TIMESTAMP) : 0) +
+ ((tp->t_flags & TF_ACCECN_OPT) ?
+ PADTCPOLEN(TCPOLEN_ACCECN_EMPTY +
+ 3 * TCPOLEN_ACCECN_COUNTER) : 0))) +
len) >= tp->t_maxseg)
goto send;
/*
@@ -868,9 +872,32 @@
if (tp->t_flags & TF_SIGNATURE)
to.to_flags |= TOF_SIGNATURE;
#endif /* TCP_SIGNATURE */
-
+ /*
+ * AccECN option
+ * Don't send on <SYN>, only on <SYN,ACK> or
+ * when doing an AccECN session
+ */
+ if (tp->t_flags & TF_ACCECN_OPT) {
+ to.to_flags |= TOF_ACCECNOPT;
+ to.to_ae = &tp->t_ae;
+ to.to_flags |= ((tp->t_flags2 & TF2_ACO_E0) ? TOF_ACCE_E0 : 0) |
+ ((tp->t_flags2 & TF2_ACO_E1) ? TOF_ACCE_E1 : 0) |
+ ((tp->t_flags2 & TF2_ACO_CE) ? TOF_ACCE_CE : 0);
+ if (flags & TH_SYN)
+ to.to_flags |= TOF_ACCE_SYN;
+ if (tp->t_flags & TF_ACKNOW)
+ to.to_flags |= TOF_ACCE_ACKNOW;
+ }
/* Processing the options. */
hdrlen += optlen = tcp_addoptions(&to, opt);
+ if (to.to_flags & TOF_ACCECNOPT) {
+ if ((to.to_flags & TOF_ACCE_E0) == 0)
+ tp->t_flags2 &= ~TF2_ACO_E0;
+ if ((to.to_flags & TOF_ACCE_E1) == 0)
+ tp->t_flags2 &= ~TF2_ACO_E1;
+ if ((to.to_flags & TOF_ACCE_CE) == 0)
+ tp->t_flags2 &= ~TF2_ACO_CE;
+ }
/*
* If we wanted a TFO option to be added, but it was unable
* to fit, ensure no data is sent.
@@ -1909,6 +1936,78 @@
optlen += total_len;
break;
}
+ case TOF_ACCECNOPT:
+ {
+ int tmp = 0;
+ int max_len = TCP_MAXOLEN - optlen;
+ if (max_len < TCPOLEN_ACCECN_EMPTY) {
+ to->to_flags &= ~TOF_ACCECNOPT;
+ continue;
+ }
+ if (max_len < (TCPOLEN_ACCECN_EMPTY +
+ 1 * TCPOLEN_ACCECN_COUNTER)) {
+ if (to->to_flags & TOF_ACCE_SYN) {
+ *optp++ = TCPOPT_ACCECN0;
+ optlen += TCPOLEN_ACCECN_EMPTY;
+ *optp++ = TCPOLEN_ACCECN_EMPTY;
+ continue;
+ } else {
+ to->to_flags &= ~TOF_ACCECNOPT;
+ continue;
+ }
+ }
+ *optp++ = (to->to_flags & TOF_ACCE_E1) ?
+ TCPOPT_ACCECN1 : TCPOPT_ACCECN0;
+ if (max_len >= (TCPOLEN_ACCECN_EMPTY +
+ 3 * TCPOLEN_ACCECN_COUNTER)) {
+ tmp = TCPOLEN_ACCECN_EMPTY +
+ 3 * TCPOLEN_ACCECN_COUNTER;
+ } else
+ if (max_len >= (TCPOLEN_ACCECN_EMPTY +
+ 2 * TCPOLEN_ACCECN_COUNTER)) {
+ tmp = TCPOLEN_ACCECN_EMPTY +
+ 2 * TCPOLEN_ACCECN_COUNTER;
+ } else
+ if (max_len >= (TCPOLEN_ACCECN_EMPTY +
+ 1 * TCPOLEN_ACCECN_COUNTER)) {
+ tmp = TCPOLEN_ACCECN_EMPTY +
+ 1 * TCPOLEN_ACCECN_COUNTER;
+ }
+ *optp++ = tmp;
+ optlen += tmp;
+ if (to->to_flags & TOF_ACCE_E1) {
+ hton24(&optp, to->to_ae->re1b);
+ } else {
+ hton24(&optp, to->to_ae->re0b);
+ to->to_flags &= ~TOF_ACCE_E0;
+ }
+ if (max_len < (TCPOLEN_ACCECN_EMPTY +
+ 2 * TCPOLEN_ACCECN_COUNTER)) {
+ to->to_flags &= ~TOF_ACCE_E1;
+ continue;
+ }
+ hton24(&optp, to->to_ae->rceb);
+ to->to_flags &= ~TOF_ACCE_CE;
+ if (max_len < (TCPOLEN_ACCECN_EMPTY +
+ 3 * TCPOLEN_ACCECN_COUNTER)) {
+ to->to_flags &= ~TOF_ACCE_E1;
+ continue;
+ }
+ /*
+ * TCP option sufficient to hold full AccECN option
+ * but only send changed counters normally,
+ * full counters on ACKNOW
+ */
+ if (to->to_flags & TOF_ACCE_E1) {
+ hton24(&optp, to->to_ae->re0b);
+ to->to_flags &= ~TOF_ACCE_E0;
+ to->to_flags &= ~TOF_ACCE_E1;
+ continue;
+ } else {
+ hton24(&optp, to->to_ae->re1b);
+ continue;
+ }
+ }
default:
panic("%s: unknown TCP option type", __func__);
break;
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -1805,7 +1805,6 @@
#ifdef INVARIANTS
int thflags = tcp_get_flags(th);
#endif
-
KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL"));
NET_EPOCH_ASSERT();
@@ -2013,9 +2012,24 @@
if (tp->t_flags & TF_SIGNATURE)
to.to_flags |= TOF_SIGNATURE;
#endif
+ /* AccECN option */
+ if (tp->t_flags & TF_ACCECN_OPT) {
+ to.to_flags |= TOF_ACCECNOPT;
+ to.to_ae = &tp->t_ae;
+ to.to_flags |= ((tp->t_flags2 & TF2_ACO_E0) ? TOF_ACCE_E0 : 0) |
+ ((tp->t_flags2 & TF2_ACO_E1) ? TOF_ACCE_E1 : 0) |
+ ((tp->t_flags2 & TF2_ACO_CE) ? TOF_ACCE_CE : 0);
+ }
/* Add the options. */
tlen += optlen = tcp_addoptions(&to, optp);
-
+ if (to.to_flags & TOF_ACCECNOPT) {
+ if ((to.to_flags & TOF_ACCE_E0) == 0)
+ tp->t_flags2 &= ~TF2_ACO_E0;
+ if ((to.to_flags & TOF_ACCE_E1) == 0)
+ tp->t_flags2 &= ~TF2_ACO_E1;
+ if ((to.to_flags & TOF_ACCE_CE) == 0)
+ tp->t_flags2 &= ~TF2_ACO_CE;
+ }
/* Update m_len in the correct mbuf. */
optm->m_len += optlen;
} else
@@ -2330,6 +2344,14 @@
tcp_log_tcpcbinit(tp);
#endif
tp->t_pacing_rate = -1;
+ if (V_tcp_do_lrd)
+ tp->t_flags |= TF_LRD;
+ tp->t_ae.re0b = 1;
+ tp->t_ae.re1b = 1;
+ tp->t_ae.rceb = 0;
+ tp->t_ae.se0b = 1;
+ tp->t_ae.se1b = 1;
+ tp->t_ae.sceb = 0;
if (tp->t_fb->tfb_tcp_fb_init) {
if ((*tp->t_fb->tfb_tcp_fb_init)(tp, &tp->t_fb_ptr)) {
refcount_release(&tp->t_fb->tfb_refcnt);
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -1810,6 +1810,7 @@
#ifdef INET6
struct ip6_hdr *ip6 = NULL;
#endif
+ struct accecn ae;
NET_EPOCH_ASSERT();
@@ -1949,6 +1950,20 @@
/* don't send cookie again when retransmitting response */
sc->sc_tfo_cookie = NULL;
}
+ if (V_tcp_ecn_option)
+ to.to_flags |= TOF_ACCE_SYN;
+ }
+ if (V_tcp_ecn_option &&
+ (sc->sc_flags & SCF_ECN_MASK) &&
+ ((sc->sc_flags & SCF_ECN_MASK) != SCF_ECN)) {
+ to.to_flags |= TOF_ACCECNOPT;
+ to.to_flags |= TOF_ACCE_E0 |
+ TOF_ACCE_E1 |
+ TOF_ACCE_CE;
+ ae.re0b = 1;
+ ae.re1b = 1;
+ ae.rceb = 0;
+ to.to_ae = &ae;
}
if (sc->sc_flags & SCF_TIMESTAMP) {
to.to_tsval = sc->sc_tsoff + tcp_ts_getticks();
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -789,6 +789,15 @@
#endif
in_losing(inp);
}
+ /*
+ * Disable AccECN option when
+ * retransmitting after multiple
+ * timeouts.
+ */
+ if ((tp->t_rxtshift >= V_tcp_ecn_maxretries) &&
+ (tp->t_flags2 & TF2_ACE_PERMIT) &&
+ (tp->t_flags & TF_ACCECN_OPT))
+ tp->t_flags &= ~TF_ACCECN_OPT;
tp->snd_nxt = tp->snd_una;
tp->snd_recover = tp->snd_max;
/*
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -1611,15 +1611,23 @@
* AccECN related counters.
*/
if ((tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) ==
- (TF2_ECN_PERMIT | TF2_ACE_PERMIT))
+ (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) {
/*
* Internal counter starts at 5 for AccECN
* but 0 for RFC3168 ECN.
*/
- ti->tcpi_delivered_ce = tp->t_scep - 5;
- else
- ti->tcpi_delivered_ce = tp->t_scep;
- ti->tcpi_received_ce = tp->t_rcep;
+ ti->tcpi_delivered_ce = tp->t_ae.scep - 5;
+ ti->tcpi_received_ce = tp->t_ae.rcep - 5;
+ } else {
+ ti->tcpi_delivered_ce = tp->t_ae.scep;
+ ti->tcpi_received_ce = tp->t_ae.rcep;
+ }
+ ti->tcpi_received_e0_bytes = tp->t_ae.re0b - 1;
+ ti->tcpi_received_e1_bytes = tp->t_ae.re1b - 1;
+ ti->tcpi_received_ce_bytes = tp->t_ae.rceb;
+ ti->tcpi_delivered_e0_bytes = tp->t_ae.se0b - 1;
+ ti->tcpi_delivered_e1_bytes = tp->t_ae.se1b - 1;
+ ti->tcpi_delivered_ce_bytes = tp->t_ae.sceb;
}
/*
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -129,6 +129,18 @@
int32_t lost_bytes; /* number of rfc6675 IsLost() bytes */
};
+struct accecn {
+ uint32_t rcep; /* Number of received CE marked pkts */
+ uint32_t scep; /* Synced number of delivered CE pkts */
+ uint32_t dcep; /* delta of CE marks for rollback */
+ uint32_t re0b; /* Number of ECT0 marked data bytes */
+ uint32_t re1b; /* Number of ECT1 marked data bytes */
+ uint32_t rceb; /* Number of CE marked data bytes */
+ uint32_t se0b; /* Synced number of delivered ECT0 bytes */
+ uint32_t se1b; /* Synced number of delivered ECT1 bytes */
+ uint32_t sceb; /* Synced number of delivered CE bytes */
+};
+
#define SEGQ_EMPTY(tp) TAILQ_EMPTY(&(tp)->t_segq)
STAILQ_HEAD(tcp_log_stailq, tcp_log_mem);
@@ -431,8 +443,7 @@
int t_dupacks; /* consecutive dup acks recd */
int t_lognum; /* Number of log entries */
int t_loglimit; /* Maximum number of log entries */
- uint32_t t_rcep; /* Number of received CE marked pkts */
- uint32_t t_scep; /* Synced number of delivered CE pkts */
+ struct accecn t_ae; /* AccECN related byte counters */
int64_t t_pacing_rate; /* bytes / sec, -1 => unlimited */
struct tcp_log_stailq t_logs; /* Log buffer */
struct tcp_log_id_node *t_lin;
@@ -788,7 +799,7 @@
#define TF_TSO 0x01000000 /* TSO enabled on this connection */
#define TF_TOE 0x02000000 /* this connection is offloaded */
#define TF_CLOSED 0x04000000 /* close(2) called on socket */
-#define TF_UNUSED1 0x08000000 /* unused */
+#define TF_ACCECN_OPT 0x08000000 /* AccECN is using TCP options */
#define TF_LRD 0x10000000 /* Lost Retransmission Detection */
#define TF_CONGRECOVERY 0x20000000 /* congestion recovery mode */
#define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */
@@ -843,7 +854,9 @@
#define TF2_MBUF_QUEUE_READY 0x00020000 /* Inputs can be queued */
#define TF2_DONT_SACK_QUEUE 0x00040000 /* Don't wake on sack */
#define TF2_CANNOT_DO_ECN 0x00080000 /* The stack does not do ECN */
-
+#define TF2_ACO_E0 0x00100000 /* EE0 counter changed */
+#define TF2_ACO_E1 0x00200000 /* EE1 counter changed */
+#define TF2_ACO_CE 0x00400000 /* ECE counter changed */
/*
* Structure to hold TCP options that are only used during segment
* processing (in tcp_input), but not held in the tcpcb.
@@ -854,14 +867,21 @@
*/
struct tcpopt {
u_int32_t to_flags; /* which options are present */
-#define TOF_MSS 0x0001 /* maximum segment size */
-#define TOF_SCALE 0x0002 /* window scaling */
-#define TOF_SACKPERM 0x0004 /* SACK permitted */
-#define TOF_TS 0x0010 /* timestamp */
-#define TOF_SIGNATURE 0x0040 /* TCP-MD5 signature option (RFC2385) */
-#define TOF_SACK 0x0080 /* Peer sent SACK option */
-#define TOF_FASTOPEN 0x0100 /* TCP Fast Open (TFO) cookie */
-#define TOF_MAXOPT 0x0200
+#define TOF_MSS 0x00000001 /* maximum segment size */
+#define TOF_SCALE 0x00000002 /* window scaling */
+#define TOF_SACKPERM 0x00000004 /* SACK permitted */
+#define TOF_TS 0x00000010 /* timestamp */
+#define TOF_SIGNATURE 0x00000040 /* TCP-MD5 signature option (RFC2385) */
+#define TOF_SACK 0x00000080 /* Peer sent SACK option */
+#define TOF_FASTOPEN 0x00000100 /* TCP Fast Open (TFO) cookie */
+#define TOF_ACCECNOPT 0x00000200 /* AccECN Option */
+#define TOF_MAXOPT 0x00000400
+ /* Keep internal flags above TOF_MAXOPT */
+#define TOF_ACCE_SYN 0x80000000 /* send empty option */
+#define TOF_ACCE_CE 0x40000000 /* CE counter changed */
+#define TOF_ACCE_E0 0x20000000 /* E0 counter changed */
+#define TOF_ACCE_E1 0x10000000 /* E1 counter changed */
+#define TOF_ACCE_ACKNOW 0x08000000 /* send full option */
u_int32_t to_tsval; /* new timestamp */
u_int32_t to_tsecr; /* reflected timestamp */
u_char *to_sacks; /* pointer to the first SACK blocks */
@@ -871,7 +891,8 @@
u_int8_t to_wscale; /* window scaling */
u_int8_t to_nsacks; /* number of SACK blocks */
u_int8_t to_tfo_len; /* TFO cookie length */
- u_int32_t to_spare; /* UTO */
+ struct accecn *to_ae; /* pointer to AccECN byte counters */
+ u_int32_t to_spare; /* UTO */
};
/*
@@ -1273,6 +1294,7 @@
VNET_DECLARE(int, tcp_do_sack);
VNET_DECLARE(int, tcp_do_tso);
VNET_DECLARE(int, tcp_ecn_maxretries);
+VNET_DECLARE(int, tcp_ecn_option);
VNET_DECLARE(int, tcp_initcwnd_segments);
VNET_DECLARE(int, tcp_insecure_rst);
VNET_DECLARE(int, tcp_insecure_syn);
@@ -1319,6 +1341,7 @@
#define V_tcp_do_sack VNET(tcp_do_sack)
#define V_tcp_do_tso VNET(tcp_do_tso)
#define V_tcp_ecn_maxretries VNET(tcp_ecn_maxretries)
+#define V_tcp_ecn_option VNET(tcp_ecn_option)
#define V_tcp_initcwnd_segments VNET(tcp_initcwnd_segments)
#define V_tcp_insecure_rst VNET(tcp_insecure_rst)
#define V_tcp_insecure_syn VNET(tcp_insecure_syn)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Jan 28, 12:48 AM (10 h, 17 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16228946
Default Alt Text
D36303.diff (22 KB)
Attached To
Mode
D36303: Implement AccECN option (w/ early assigned option number)
Attached
Detach File
Event Timeline
Log In to Comment