Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F96215891
D21011.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
13 KB
Referenced Files
None
Subscribers
None
D21011.diff
View Options
diff --git a/share/man/man4/tcp.4 b/share/man/man4/tcp.4
--- a/share/man/man4/tcp.4
+++ b/share/man/man4/tcp.4
@@ -495,6 +495,13 @@
Allow incoming connections to request ECN.
Outgoing connections will not request ECN.
(default)
+.It 3
+Negotiate on incoming connection for Accurate ECN, ECN, or no ECN.
+Outgoing connections will request Accurate ECN and fall back to
+ECN depending on the capabilities of the server.
+.It 4
+Negotiate on incoming connection for Accurate ECN, ECN, or no ECN.
+Outgoing connections will not request ECN.
.El
.It Va ecn.maxretries
Number of retries (SYN or SYN/ACK retransmits) before disabling ECN on a
diff --git a/sys/netinet/tcp_ecn.h b/sys/netinet/tcp_ecn.h
--- a/sys/netinet/tcp_ecn.h
+++ b/sys/netinet/tcp_ecn.h
@@ -49,6 +49,7 @@
void tcp_ecn_syncache_socket(struct tcpcb *, struct syncache *);
int tcp_ecn_syncache_add(uint16_t, int);
uint16_t tcp_ecn_syncache_respond(uint16_t, struct syncache *);
+int tcp_ecn_get_ace(uint16_t);
#endif /* _KERNEL */
diff --git a/sys/netinet/tcp_ecn.c b/sys/netinet/tcp_ecn.c
--- a/sys/netinet/tcp_ecn.c
+++ b/sys/netinet/tcp_ecn.c
@@ -109,12 +109,91 @@
void
tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos)
{
- thflags &= (TH_CWR|TH_ECE);
- if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
- V_tcp_do_ecn) {
- tp->t_flags2 |= TF2_ECN_PERMIT;
- TCPSTAT_INC(tcps_ecn_shs);
+ if (V_tcp_do_ecn == 0)
+ return;
+ if ((V_tcp_do_ecn == 1) ||
+ (V_tcp_do_ecn == 2)) {
+ /* RFC3168 ECN handling */
+ if ((thflags & (TH_CWR | TH_ECE)) == (0 | TH_ECE)) {
+ tp->t_flags2 |= TF2_ECN_PERMIT;
+ TCPSTAT_INC(tcps_ecn_shs);
+ }
+ } else
+ /* decoding Accurate ECN according to table in section 3.1.1 */
+ if ((V_tcp_do_ecn == 3) ||
+ (V_tcp_do_ecn == 4)) {
+ /*
+ * on the SYN,ACK, process the AccECN
+ * flags indicating the state the SYN
+ * was delivered.
+ * Reactions to Path ECN mangling can
+ * come here.
+ */
+ switch (thflags & (TH_AE | TH_CWR | TH_ECE)) {
+ /* RFC3168 SYN */
+ case (0|0|TH_ECE):
+ tp->t_flags2 |= TF2_ECN_PERMIT;
+ TCPSTAT_INC(tcps_ecn_shs);
+ break;
+ /* non-ECT SYN */
+ case (0|TH_CWR|0):
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ tp->t_scep = 5;
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_nect);
+ break;
+ /* ECT0 SYN */
+ case (TH_AE|0|0):
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ tp->t_scep = 5;
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_ect0);
+ break;
+ /* ECT1 SYN */
+ case (0|TH_CWR|TH_ECE):
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ tp->t_scep = 5;
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_ect1);
+ break;
+ /* CE SYN */
+ case (TH_AE|TH_CWR|0):
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ tp->t_scep = 6;
+ /*
+ * reduce the IW to 2 MSS (to
+ * account for delayed acks) if
+ * the SYN,ACK was CE marked
+ */
+ tp->snd_cwnd = 2 * tcp_maxseg(tp);
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_nect);
+ break;
+ default:
+ break;
+ }
+ /*
+ * Set the AccECN Codepoints on
+ * the outgoing <ACK> to the ECN
+ * state of the <SYN,ACK>
+ * according to table 3 in the
+ * AccECN draft
+ */
+ switch (iptos & IPTOS_ECN_MASK) {
+ case (IPTOS_ECN_NOTECT):
+ tp->t_rcep = 0b010;
+ break;
+ case (IPTOS_ECN_ECT0):
+ tp->t_rcep = 0b100;
+ break;
+ case (IPTOS_ECN_ECT1):
+ tp->t_rcep = 0b011;
+ break;
+ case (IPTOS_ECN_CE):
+ tp->t_rcep = 0b110;
+ break;
+ }
}
}
@@ -128,13 +207,53 @@
return;
if (V_tcp_do_ecn == 0)
return;
- if ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2)) {
+ if ((V_tcp_do_ecn == 1) ||
+ (V_tcp_do_ecn == 2)) {
/* RFC3168 ECN handling */
if ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) {
tp->t_flags2 |= TF2_ECN_PERMIT;
tp->t_flags2 |= TF2_ECN_SND_ECE;
TCPSTAT_INC(tcps_ecn_shs);
}
+ } else
+ if ((V_tcp_do_ecn == 3) ||
+ (V_tcp_do_ecn == 4)) {
+ /* AccECN handling */
+ switch (thflags & (TH_AE | TH_CWR | TH_ECE)) {
+ default:
+ case (0|0|0):
+ break;
+ case (0|TH_CWR|TH_ECE):
+ tp->t_flags2 |= TF2_ECN_PERMIT;
+ tp->t_flags2 |= TF2_ECN_SND_ECE;
+ TCPSTAT_INC(tcps_ecn_shs);
+ break;
+ case (TH_AE|TH_CWR|TH_ECE):
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ TCPSTAT_INC(tcps_ecn_shs);
+ /*
+ * Set the AccECN Codepoints on
+ * the outgoing <ACK> to the ECN
+ * state of the <SYN,ACK>
+ * according to table 3 in the
+ * AccECN draft
+ */
+ switch (iptos & IPTOS_ECN_MASK) {
+ case (IPTOS_ECN_NOTECT):
+ tp->t_rcep = 0b010;
+ break;
+ case (IPTOS_ECN_ECT0):
+ tp->t_rcep = 0b100;
+ break;
+ case (IPTOS_ECN_ECT1):
+ tp->t_rcep = 0b011;
+ break;
+ case (IPTOS_ECN_CE):
+ tp->t_rcep = 0b110;
+ break;
+ }
+ break;
+ }
}
}
@@ -146,7 +265,7 @@
{
int delta_ace = 0;
- if (tp->t_flags2 & TF2_ECN_PERMIT) {
+ if (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) {
switch (iptos & IPTOS_ECN_MASK) {
case IPTOS_ECN_CE:
TCPSTAT_INC(tcps_ecn_ce);
@@ -159,15 +278,52 @@
break;
}
- /* RFC3168 ECN handling */
- if (thflags & TH_ECE)
- delta_ace = 1;
- if (thflags & TH_CWR) {
- tp->t_flags2 &= ~TF2_ECN_SND_ECE;
- tp->t_flags |= TF_ACKNOW;
+ if (tp->t_flags2 & TF2_ACE_PERMIT) {
+ if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+ tp->t_rcep += 1;
+ if (tp->t_flags2 & TF2_ECN_PERMIT) {
+ delta_ace = (tcp_ecn_get_ace(thflags) + 8 -
+ (tp->t_scep & 0x07)) & 0x07;
+ tp->t_scep += delta_ace;
+ } else {
+ /*
+ * process the final ACK of the 3WHS
+ * see table 3 in draft-ietf-tcpm-accurate-ecn
+ */
+ switch (tcp_ecn_get_ace(thflags)) {
+ case 0b010:
+ /* nonECT SYN or SYN,ACK */
+ /* Fallthrough */
+ case 0b011:
+ /* ECT1 SYN or SYN,ACK */
+ /* Fallthrough */
+ case 0b100:
+ /* ECT0 SYN or SYN,ACK */
+ tp->t_scep = 5;
+ break;
+ case 0b110:
+ /* CE SYN or SYN,ACK */
+ tp->t_scep = 6;
+ tp->snd_cwnd = 2 * tcp_maxseg(tp);
+ break;
+ default:
+ /* mangled AccECN handshake */
+ tp->t_scep = 5;
+ break;
+ }
+ tp->t_flags2 |= TF2_ECN_PERMIT;
+ }
+ } else {
+ /* RFC3168 ECN handling */
+ if (thflags & TH_ECE)
+ delta_ace = 1;
+ if (thflags & TH_CWR) {
+ tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+ tp->t_flags |= TF_ACKNOW;
+ }
+ if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+ tp->t_flags2 |= TF2_ECN_SND_ECE;
}
- if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
- tp->t_flags2 |= TF2_ECN_SND_ECE;
/* Process a packet differently from RFC3168. */
cc_ecnpkt_handler_flags(tp, thflags, iptos);
@@ -184,6 +340,8 @@
{
uint16_t thflags = 0;
+ if (V_tcp_do_ecn == 0)
+ return thflags;
if (V_tcp_do_ecn == 1) {
/* Send a RFC3168 ECN setup <SYN> packet */
if (tp->t_rxtshift >= 1) {
@@ -191,6 +349,14 @@
thflags = TH_ECE|TH_CWR;
} else
thflags = TH_ECE|TH_CWR;
+ } else
+ if (V_tcp_do_ecn == 3) {
+ /* Send an Accurate ECN setup <SYN> packet */
+ if (tp->t_rxtshift >= 1) {
+ if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
+ thflags = TH_ECE|TH_CWR|TH_AE;
+ } else
+ thflags = TH_ECE|TH_CWR|TH_AE;
}
return thflags;
@@ -215,6 +381,7 @@
newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
!rxmit &&
!((tp->t_flags & TF_FORCEDATA) && len == 1));
+ /* RFC3168 ECN marking, only new data segments */
if (newdata) {
ipecn = IPTOS_ECN_ECT0;
TCPSTAT_INC(tcps_ecn_ect0);
@@ -222,13 +389,35 @@
/*
* Reply with proper ECN notifications.
*/
- if (newdata &&
- (tp->t_flags2 & TF2_ECN_SND_CWR)) {
- *thflags |= TH_CWR;
- tp->t_flags2 &= ~TF2_ECN_SND_CWR;
+ if (tp->t_flags2 & TF2_ACE_PERMIT) {
+ *thflags &= ~(TH_AE|TH_CWR|TH_ECE);
+ if (tp->t_rcep & 0x01)
+ *thflags |= TH_ECE;
+ if (tp->t_rcep & 0x02)
+ *thflags |= TH_CWR;
+ if (tp->t_rcep & 0x04)
+ *thflags |= TH_AE;
+ if (!(tp->t_flags2 & TF2_ECN_PERMIT)) {
+ /*
+ * here we process the final
+ * ACK of the 3WHS
+ */
+ if (tp->t_rcep == 0b110) {
+ tp->t_rcep = 6;
+ } else {
+ tp->t_rcep = 5;
+ }
+ tp->t_flags2 |= TF2_ECN_PERMIT;
+ }
+ } else {
+ if (newdata &&
+ (tp->t_flags2 & TF2_ECN_SND_CWR)) {
+ *thflags |= TH_CWR;
+ tp->t_flags2 &= ~TF2_ECN_SND_CWR;
+ }
+ if (tp->t_flags2 & TF2_ECN_SND_ECE)
+ *thflags |= TH_ECE;
}
- if (tp->t_flags2 & TF2_ECN_SND_ECE)
- *thflags |= TH_ECE;
return ipecn;
}
@@ -245,6 +434,20 @@
case SCF_ECN:
tp->t_flags2 |= TF2_ECN_PERMIT;
break;
+ case SCF_ACE_N:
+ /* Fallthrough */
+ case SCF_ACE_0:
+ /* Fallthrough */
+ case SCF_ACE_1:
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ tp->t_scep = 5;
+ tp->t_rcep = 5;
+ break;
+ case SCF_ACE_CE:
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ tp->t_scep = 6;
+ tp->t_rcep = 6;
+ break;
/* undefined SCF codepoint */
default:
break;
@@ -261,15 +464,54 @@
{
int scflags = 0;
- switch (thflags & (TH_CWR|TH_ECE)) {
+ switch (thflags & (TH_AE|TH_CWR|TH_ECE)) {
/* no ECN */
- case (0|0):
+ case (0|0|0):
break;
/* legacy ECN */
- case (TH_CWR|TH_ECE):
+ case (0|TH_CWR|TH_ECE):
scflags = SCF_ECN;
break;
+ /* Accurate ECN */
+ case (TH_AE|TH_CWR|TH_ECE):
+ if ((V_tcp_do_ecn == 3) ||
+ (V_tcp_do_ecn == 4)) {
+ switch (iptos & IPTOS_ECN_MASK) {
+ case IPTOS_ECN_CE:
+ scflags = SCF_ACE_CE;
+ break;
+ case IPTOS_ECN_ECT0:
+ scflags = SCF_ACE_0;
+ break;
+ case IPTOS_ECN_ECT1:
+ scflags = SCF_ACE_1;
+ break;
+ case IPTOS_ECN_NOTECT:
+ scflags = SCF_ACE_N;
+ break;
+ }
+ } else
+ scflags = SCF_ECN;
+ break;
+ /* Default Case (section 3.1.2) */
default:
+ if ((V_tcp_do_ecn == 3) ||
+ (V_tcp_do_ecn == 4)) {
+ switch (iptos & IPTOS_ECN_MASK) {
+ case IPTOS_ECN_CE:
+ scflags = SCF_ACE_CE;
+ break;
+ case IPTOS_ECN_ECT0:
+ scflags = SCF_ACE_0;
+ break;
+ case IPTOS_ECN_ECT1:
+ scflags = SCF_ACE_1;
+ break;
+ case IPTOS_ECN_NOTECT:
+ scflags = SCF_ACE_N;
+ break;
+ }
+ }
break;
}
return scflags;
@@ -286,8 +528,28 @@
(sc->sc_flags & SCF_ECN_MASK)) {
switch (sc->sc_flags & SCF_ECN_MASK) {
case SCF_ECN:
- thflags |= (0 | TH_ECE);
+ thflags |= (0 | 0 | TH_ECE);
+ TCPSTAT_INC(tcps_ecn_shs);
+ break;
+ case SCF_ACE_N:
+ thflags |= (0 | TH_CWR | 0);
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_nect);
+ break;
+ case SCF_ACE_0:
+ thflags |= (TH_AE | 0 | 0);
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_ect0);
+ break;
+ case SCF_ACE_1:
+ thflags |= (0 | TH_ECE | TH_CWR);
TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_ect1);
+ break;
+ case SCF_ACE_CE:
+ thflags |= (TH_AE | TH_CWR | 0);
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_ce);
break;
/* undefined SCF codepoint */
default:
@@ -296,3 +558,17 @@
}
return thflags;
}
+
+int
+tcp_ecn_get_ace(uint16_t thflags)
+{
+ int ace = 0;
+
+ if (thflags & TH_ECE)
+ ace += 1;
+ if (thflags & TH_CWR)
+ ace += 2;
+ if (thflags & TH_AE)
+ ace += 4;
+ return ace;
+}
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -1209,7 +1209,7 @@
}
/* Also handle parallel SYN for ECN */
if ((TCPS_HAVERCVDSYN(tp->t_state)) &&
- (tp->t_flags2 & TF2_ECN_PERMIT)) {
+ (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
int ect = tcp_ecn_output_established(tp, &flags, len, sack_rxmit);
if ((tp->t_state == TCPS_SYN_RECEIVED) &&
(tp->t_flags2 & TF2_ECN_SND_ECE))
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -15883,7 +15883,7 @@
}
m->m_pkthdr.rcvif = (struct ifnet *)0;
if (TCPS_HAVERCVDSYN(tp->t_state) &&
- (tp->t_flags2 & TF2_ECN_PERMIT)) {
+ (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
int ect = tcp_ecn_output_established(tp, &flags, len, true);
if ((tp->t_state == TCPS_SYN_RECEIVED) &&
(tp->t_flags2 & TF2_ECN_SND_ECE))
@@ -16362,7 +16362,7 @@
}
m->m_pkthdr.rcvif = (struct ifnet *)0;
if (TCPS_HAVERCVDSYN(tp->t_state) &&
- (tp->t_flags2 & TF2_ECN_PERMIT)) {
+ (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
int ect = tcp_ecn_output_established(tp, &flags, len, false);
if ((tp->t_state == TCPS_SYN_RECEIVED) &&
(tp->t_flags2 & TF2_ECN_SND_ECE))
@@ -18487,7 +18487,7 @@
}
/* Also handle parallel SYN for ECN */
if (TCPS_HAVERCVDSYN(tp->t_state) &&
- (tp->t_flags2 & TF2_ECN_PERMIT)) {
+ (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
int ect = tcp_ecn_output_established(tp, &flags, len, sack_rxmit);
if ((tp->t_state == TCPS_SYN_RECEIVED) &&
(tp->t_flags2 & TF2_ECN_SND_ECE))
@@ -20489,7 +20489,7 @@
ti->tcpi_snd_wscale = tp->snd_scale;
ti->tcpi_rcv_wscale = tp->rcv_scale;
}
- if (tp->t_flags2 & TF2_ECN_PERMIT)
+ if (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))
ti->tcpi_options |= TCPI_OPT_ECN;
if (tp->t_flags & TF_FASTOPEN)
ti->tcpi_options |= TCPI_OPT_TFO;
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -249,8 +249,8 @@
int t_dupacks; /* consecutive dup acks recd */
int t_lognum; /* Number of log entries */
int t_loglimit; /* Maximum number of log entries */
- uint32_t r_cep; /* Number of received CE marked packets */
- uint32_t s_cep; /* Synced number of delivered CE packets */
+ uint32_t t_rcep; /* Number of received CE marked packets */
+ uint32_t t_scep; /* Synced number of delivered CE packets */
int64_t t_pacing_rate; /* bytes / sec, -1 => unlimited */
struct tcp_log_stailq t_logs; /* Log buffer */
struct tcp_log_id_node *t_lin;
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Sep 25, 2:26 AM (21 h, 56 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
12722425
Default Alt Text
D21011.diff (13 KB)
Attached To
Mode
D21011: Functional implementation of Accurate ECN in FreeBSD
Attached
Detach File
Event Timeline
Log In to Comment