Page MenuHomeFreeBSD

D21011.diff
No OneTemporary

D21011.diff

diff --git a/share/man/man4/tcp.4 b/share/man/man4/tcp.4
--- a/share/man/man4/tcp.4
+++ b/share/man/man4/tcp.4
@@ -495,6 +495,13 @@
Allow incoming connections to request ECN.
Outgoing connections will not request ECN.
(default)
+.It 3
+Negotiate on incoming connection for Accurate ECN, ECN, or no ECN.
+Outgoing connections will request Accurate ECN and fall back to
+ECN depending on the capabilities of the server.
+.It 4
+Negotiate on incoming connection for Accurate ECN, ECN, or no ECN.
+Outgoing connections will not request ECN.
.El
.It Va ecn.maxretries
Number of retries (SYN or SYN/ACK retransmits) before disabling ECN on a
diff --git a/sys/netinet/tcp_ecn.h b/sys/netinet/tcp_ecn.h
--- a/sys/netinet/tcp_ecn.h
+++ b/sys/netinet/tcp_ecn.h
@@ -49,6 +49,7 @@
void tcp_ecn_syncache_socket(struct tcpcb *, struct syncache *);
int tcp_ecn_syncache_add(uint16_t, int);
uint16_t tcp_ecn_syncache_respond(uint16_t, struct syncache *);
+int tcp_ecn_get_ace(uint16_t);
#endif /* _KERNEL */
diff --git a/sys/netinet/tcp_ecn.c b/sys/netinet/tcp_ecn.c
--- a/sys/netinet/tcp_ecn.c
+++ b/sys/netinet/tcp_ecn.c
@@ -109,12 +109,91 @@
void
tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos)
{
- thflags &= (TH_CWR|TH_ECE);
- if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
- V_tcp_do_ecn) {
- tp->t_flags2 |= TF2_ECN_PERMIT;
- TCPSTAT_INC(tcps_ecn_shs);
+ if (V_tcp_do_ecn == 0)
+ return;
+ if ((V_tcp_do_ecn == 1) ||
+ (V_tcp_do_ecn == 2)) {
+ /* RFC3168 ECN handling */
+ if ((thflags & (TH_CWR | TH_ECE)) == (0 | TH_ECE)) {
+ tp->t_flags2 |= TF2_ECN_PERMIT;
+ TCPSTAT_INC(tcps_ecn_shs);
+ }
+ } else
+ /* decoding Accurate ECN according to table in section 3.1.1 */
+ if ((V_tcp_do_ecn == 3) ||
+ (V_tcp_do_ecn == 4)) {
+ /*
+ * on the SYN,ACK, process the AccECN
+ * flags indicating the state the SYN
+ * was delivered.
+ * Reactions to Path ECN mangling can
+ * come here.
+ */
+ switch (thflags & (TH_AE | TH_CWR | TH_ECE)) {
+ /* RFC3168 SYN */
+ case (0|0|TH_ECE):
+ tp->t_flags2 |= TF2_ECN_PERMIT;
+ TCPSTAT_INC(tcps_ecn_shs);
+ break;
+ /* non-ECT SYN */
+ case (0|TH_CWR|0):
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ tp->t_scep = 5;
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_nect);
+ break;
+ /* ECT0 SYN */
+ case (TH_AE|0|0):
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ tp->t_scep = 5;
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_ect0);
+ break;
+ /* ECT1 SYN */
+ case (0|TH_CWR|TH_ECE):
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ tp->t_scep = 5;
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_ect1);
+ break;
+ /* CE SYN */
+ case (TH_AE|TH_CWR|0):
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ tp->t_scep = 6;
+ /*
+ * reduce the IW to 2 MSS (to
+ * account for delayed acks) if
+ * the SYN,ACK was CE marked
+ */
+ tp->snd_cwnd = 2 * tcp_maxseg(tp);
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_nect);
+ break;
+ default:
+ break;
+ }
+ /*
+ * Set the AccECN Codepoints on
+ * the outgoing <ACK> to the ECN
+ * state of the <SYN,ACK>
+ * according to table 3 in the
+ * AccECN draft
+ */
+ switch (iptos & IPTOS_ECN_MASK) {
+ case (IPTOS_ECN_NOTECT):
+ tp->t_rcep = 0b010;
+ break;
+ case (IPTOS_ECN_ECT0):
+ tp->t_rcep = 0b100;
+ break;
+ case (IPTOS_ECN_ECT1):
+ tp->t_rcep = 0b011;
+ break;
+ case (IPTOS_ECN_CE):
+ tp->t_rcep = 0b110;
+ break;
+ }
}
}
@@ -128,13 +207,53 @@
return;
if (V_tcp_do_ecn == 0)
return;
- if ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2)) {
+ if ((V_tcp_do_ecn == 1) ||
+ (V_tcp_do_ecn == 2)) {
/* RFC3168 ECN handling */
if ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) {
tp->t_flags2 |= TF2_ECN_PERMIT;
tp->t_flags2 |= TF2_ECN_SND_ECE;
TCPSTAT_INC(tcps_ecn_shs);
}
+ } else
+ if ((V_tcp_do_ecn == 3) ||
+ (V_tcp_do_ecn == 4)) {
+ /* AccECN handling */
+ switch (thflags & (TH_AE | TH_CWR | TH_ECE)) {
+ default:
+ case (0|0|0):
+ break;
+ case (0|TH_CWR|TH_ECE):
+ tp->t_flags2 |= TF2_ECN_PERMIT;
+ tp->t_flags2 |= TF2_ECN_SND_ECE;
+ TCPSTAT_INC(tcps_ecn_shs);
+ break;
+ case (TH_AE|TH_CWR|TH_ECE):
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ TCPSTAT_INC(tcps_ecn_shs);
+ /*
+ * Set the AccECN Codepoints on
+ * the outgoing <ACK> to the ECN
+ * state of the <SYN,ACK>
+ * according to table 3 in the
+ * AccECN draft
+ */
+ switch (iptos & IPTOS_ECN_MASK) {
+ case (IPTOS_ECN_NOTECT):
+ tp->t_rcep = 0b010;
+ break;
+ case (IPTOS_ECN_ECT0):
+ tp->t_rcep = 0b100;
+ break;
+ case (IPTOS_ECN_ECT1):
+ tp->t_rcep = 0b011;
+ break;
+ case (IPTOS_ECN_CE):
+ tp->t_rcep = 0b110;
+ break;
+ }
+ break;
+ }
}
}
@@ -146,7 +265,7 @@
{
int delta_ace = 0;
- if (tp->t_flags2 & TF2_ECN_PERMIT) {
+ if (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) {
switch (iptos & IPTOS_ECN_MASK) {
case IPTOS_ECN_CE:
TCPSTAT_INC(tcps_ecn_ce);
@@ -159,15 +278,52 @@
break;
}
- /* RFC3168 ECN handling */
- if (thflags & TH_ECE)
- delta_ace = 1;
- if (thflags & TH_CWR) {
- tp->t_flags2 &= ~TF2_ECN_SND_ECE;
- tp->t_flags |= TF_ACKNOW;
+ if (tp->t_flags2 & TF2_ACE_PERMIT) {
+ if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+ tp->t_rcep += 1;
+ if (tp->t_flags2 & TF2_ECN_PERMIT) {
+ delta_ace = (tcp_ecn_get_ace(thflags) + 8 -
+ (tp->t_scep & 0x07)) & 0x07;
+ tp->t_scep += delta_ace;
+ } else {
+ /*
+ * process the final ACK of the 3WHS
+ * see table 3 in draft-ietf-tcpm-accurate-ecn
+ */
+ switch (tcp_ecn_get_ace(thflags)) {
+ case 0b010:
+ /* nonECT SYN or SYN,ACK */
+ /* Fallthrough */
+ case 0b011:
+ /* ECT1 SYN or SYN,ACK */
+ /* Fallthrough */
+ case 0b100:
+ /* ECT0 SYN or SYN,ACK */
+ tp->t_scep = 5;
+ break;
+ case 0b110:
+ /* CE SYN or SYN,ACK */
+ tp->t_scep = 6;
+ tp->snd_cwnd = 2 * tcp_maxseg(tp);
+ break;
+ default:
+ /* mangled AccECN handshake */
+ tp->t_scep = 5;
+ break;
+ }
+ tp->t_flags2 |= TF2_ECN_PERMIT;
+ }
+ } else {
+ /* RFC3168 ECN handling */
+ if (thflags & TH_ECE)
+ delta_ace = 1;
+ if (thflags & TH_CWR) {
+ tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+ tp->t_flags |= TF_ACKNOW;
+ }
+ if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+ tp->t_flags2 |= TF2_ECN_SND_ECE;
}
- if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
- tp->t_flags2 |= TF2_ECN_SND_ECE;
/* Process a packet differently from RFC3168. */
cc_ecnpkt_handler_flags(tp, thflags, iptos);
@@ -184,6 +340,8 @@
{
uint16_t thflags = 0;
+ if (V_tcp_do_ecn == 0)
+ return thflags;
if (V_tcp_do_ecn == 1) {
/* Send a RFC3168 ECN setup <SYN> packet */
if (tp->t_rxtshift >= 1) {
@@ -191,6 +349,14 @@
thflags = TH_ECE|TH_CWR;
} else
thflags = TH_ECE|TH_CWR;
+ } else
+ if (V_tcp_do_ecn == 3) {
+ /* Send an Accurate ECN setup <SYN> packet */
+ if (tp->t_rxtshift >= 1) {
+ if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
+ thflags = TH_ECE|TH_CWR|TH_AE;
+ } else
+ thflags = TH_ECE|TH_CWR|TH_AE;
}
return thflags;
@@ -215,6 +381,7 @@
newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
!rxmit &&
!((tp->t_flags & TF_FORCEDATA) && len == 1));
+ /* RFC3168 ECN marking, only new data segments */
if (newdata) {
ipecn = IPTOS_ECN_ECT0;
TCPSTAT_INC(tcps_ecn_ect0);
@@ -222,13 +389,35 @@
/*
* Reply with proper ECN notifications.
*/
- if (newdata &&
- (tp->t_flags2 & TF2_ECN_SND_CWR)) {
- *thflags |= TH_CWR;
- tp->t_flags2 &= ~TF2_ECN_SND_CWR;
+ if (tp->t_flags2 & TF2_ACE_PERMIT) {
+ *thflags &= ~(TH_AE|TH_CWR|TH_ECE);
+ if (tp->t_rcep & 0x01)
+ *thflags |= TH_ECE;
+ if (tp->t_rcep & 0x02)
+ *thflags |= TH_CWR;
+ if (tp->t_rcep & 0x04)
+ *thflags |= TH_AE;
+ if (!(tp->t_flags2 & TF2_ECN_PERMIT)) {
+ /*
+ * here we process the final
+ * ACK of the 3WHS
+ */
+ if (tp->t_rcep == 0b110) {
+ tp->t_rcep = 6;
+ } else {
+ tp->t_rcep = 5;
+ }
+ tp->t_flags2 |= TF2_ECN_PERMIT;
+ }
+ } else {
+ if (newdata &&
+ (tp->t_flags2 & TF2_ECN_SND_CWR)) {
+ *thflags |= TH_CWR;
+ tp->t_flags2 &= ~TF2_ECN_SND_CWR;
+ }
+ if (tp->t_flags2 & TF2_ECN_SND_ECE)
+ *thflags |= TH_ECE;
}
- if (tp->t_flags2 & TF2_ECN_SND_ECE)
- *thflags |= TH_ECE;
return ipecn;
}
@@ -245,6 +434,20 @@
case SCF_ECN:
tp->t_flags2 |= TF2_ECN_PERMIT;
break;
+ case SCF_ACE_N:
+ /* Fallthrough */
+ case SCF_ACE_0:
+ /* Fallthrough */
+ case SCF_ACE_1:
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ tp->t_scep = 5;
+ tp->t_rcep = 5;
+ break;
+ case SCF_ACE_CE:
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ tp->t_scep = 6;
+ tp->t_rcep = 6;
+ break;
/* undefined SCF codepoint */
default:
break;
@@ -261,15 +464,54 @@
{
int scflags = 0;
- switch (thflags & (TH_CWR|TH_ECE)) {
+ switch (thflags & (TH_AE|TH_CWR|TH_ECE)) {
/* no ECN */
- case (0|0):
+ case (0|0|0):
break;
/* legacy ECN */
- case (TH_CWR|TH_ECE):
+ case (0|TH_CWR|TH_ECE):
scflags = SCF_ECN;
break;
+ /* Accurate ECN */
+ case (TH_AE|TH_CWR|TH_ECE):
+ if ((V_tcp_do_ecn == 3) ||
+ (V_tcp_do_ecn == 4)) {
+ switch (iptos & IPTOS_ECN_MASK) {
+ case IPTOS_ECN_CE:
+ scflags = SCF_ACE_CE;
+ break;
+ case IPTOS_ECN_ECT0:
+ scflags = SCF_ACE_0;
+ break;
+ case IPTOS_ECN_ECT1:
+ scflags = SCF_ACE_1;
+ break;
+ case IPTOS_ECN_NOTECT:
+ scflags = SCF_ACE_N;
+ break;
+ }
+ } else
+ scflags = SCF_ECN;
+ break;
+ /* Default Case (section 3.1.2) */
default:
+ if ((V_tcp_do_ecn == 3) ||
+ (V_tcp_do_ecn == 4)) {
+ switch (iptos & IPTOS_ECN_MASK) {
+ case IPTOS_ECN_CE:
+ scflags = SCF_ACE_CE;
+ break;
+ case IPTOS_ECN_ECT0:
+ scflags = SCF_ACE_0;
+ break;
+ case IPTOS_ECN_ECT1:
+ scflags = SCF_ACE_1;
+ break;
+ case IPTOS_ECN_NOTECT:
+ scflags = SCF_ACE_N;
+ break;
+ }
+ }
break;
}
return scflags;
@@ -286,8 +528,28 @@
(sc->sc_flags & SCF_ECN_MASK)) {
switch (sc->sc_flags & SCF_ECN_MASK) {
case SCF_ECN:
- thflags |= (0 | TH_ECE);
+ thflags |= (0 | 0 | TH_ECE);
+ TCPSTAT_INC(tcps_ecn_shs);
+ break;
+ case SCF_ACE_N:
+ thflags |= (0 | TH_CWR | 0);
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_nect);
+ break;
+ case SCF_ACE_0:
+ thflags |= (TH_AE | 0 | 0);
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_ect0);
+ break;
+ case SCF_ACE_1:
+ thflags |= (0 | TH_ECE | TH_CWR);
TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_ect1);
+ break;
+ case SCF_ACE_CE:
+ thflags |= (TH_AE | TH_CWR | 0);
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_ce);
break;
/* undefined SCF codepoint */
default:
@@ -296,3 +558,17 @@
}
return thflags;
}
+
+int
+tcp_ecn_get_ace(uint16_t thflags)
+{
+ int ace = 0;
+
+ if (thflags & TH_ECE)
+ ace += 1;
+ if (thflags & TH_CWR)
+ ace += 2;
+ if (thflags & TH_AE)
+ ace += 4;
+ return ace;
+}
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -1209,7 +1209,7 @@
}
/* Also handle parallel SYN for ECN */
if ((TCPS_HAVERCVDSYN(tp->t_state)) &&
- (tp->t_flags2 & TF2_ECN_PERMIT)) {
+ (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
int ect = tcp_ecn_output_established(tp, &flags, len, sack_rxmit);
if ((tp->t_state == TCPS_SYN_RECEIVED) &&
(tp->t_flags2 & TF2_ECN_SND_ECE))
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -15883,7 +15883,7 @@
}
m->m_pkthdr.rcvif = (struct ifnet *)0;
if (TCPS_HAVERCVDSYN(tp->t_state) &&
- (tp->t_flags2 & TF2_ECN_PERMIT)) {
+ (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
int ect = tcp_ecn_output_established(tp, &flags, len, true);
if ((tp->t_state == TCPS_SYN_RECEIVED) &&
(tp->t_flags2 & TF2_ECN_SND_ECE))
@@ -16362,7 +16362,7 @@
}
m->m_pkthdr.rcvif = (struct ifnet *)0;
if (TCPS_HAVERCVDSYN(tp->t_state) &&
- (tp->t_flags2 & TF2_ECN_PERMIT)) {
+ (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
int ect = tcp_ecn_output_established(tp, &flags, len, false);
if ((tp->t_state == TCPS_SYN_RECEIVED) &&
(tp->t_flags2 & TF2_ECN_SND_ECE))
@@ -18487,7 +18487,7 @@
}
/* Also handle parallel SYN for ECN */
if (TCPS_HAVERCVDSYN(tp->t_state) &&
- (tp->t_flags2 & TF2_ECN_PERMIT)) {
+ (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
int ect = tcp_ecn_output_established(tp, &flags, len, sack_rxmit);
if ((tp->t_state == TCPS_SYN_RECEIVED) &&
(tp->t_flags2 & TF2_ECN_SND_ECE))
@@ -20489,7 +20489,7 @@
ti->tcpi_snd_wscale = tp->snd_scale;
ti->tcpi_rcv_wscale = tp->rcv_scale;
}
- if (tp->t_flags2 & TF2_ECN_PERMIT)
+ if (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))
ti->tcpi_options |= TCPI_OPT_ECN;
if (tp->t_flags & TF_FASTOPEN)
ti->tcpi_options |= TCPI_OPT_TFO;
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -249,8 +249,8 @@
int t_dupacks; /* consecutive dup acks recd */
int t_lognum; /* Number of log entries */
int t_loglimit; /* Maximum number of log entries */
- uint32_t r_cep; /* Number of received CE marked packets */
- uint32_t s_cep; /* Synced number of delivered CE packets */
+ uint32_t t_rcep; /* Number of received CE marked packets */
+ uint32_t t_scep; /* Synced number of delivered CE packets */
int64_t t_pacing_rate; /* bytes / sec, -1 => unlimited */
struct tcp_log_stailq t_logs; /* Log buffer */
struct tcp_log_id_node *t_lin;

File Metadata

Mime Type
text/plain
Expires
Wed, Sep 25, 2:26 AM (21 h, 56 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
12722425
Default Alt Text
D21011.diff (13 KB)

Event Timeline