Page MenuHomeFreeBSD

D23230.id102159.diff
No OneTemporary

D23230.id102159.diff

Index: share/man/man4/tcp.4
===================================================================
--- share/man/man4/tcp.4
+++ share/man/man4/tcp.4
@@ -34,7 +34,7 @@
.\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93
.\" $FreeBSD$
.\"
-.Dd January 8, 2022
+.Dd January 31, 2022
.Dt TCP 4
.Os
.Sh NAME
@@ -711,6 +711,23 @@
specific connection.
This is needed to help with connection establishment
when a broken firewall is in the network path.
+.It Va ecn.generalized
+Enable sending all segments as ECN capable transport,
+including SYN, SYN/ACK, and retransmissions.
+This may only be enabled when ECN support itself is also active.
+Disabling ECN support will disable this feature automatically.
+Settings:
+.Bl -tag -compact
+.It 0
+Regular RFC3168 operation.
+Send only new data segments as ECN capable transport.
+(default)
+.It 1
+Support generalized ECN (ECN++), and send all segments of an ECN-enabled
+session as ECN capable transport.
+Also control packets to non-established and non-listening ports are
+identically marked, if outgoing sessions would request ECN.
+.El
.It Va pmtud_blackhole_detection
Enable automatic path MTU blackhole detection.
In case of retransmits of MSS sized segments,
Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -210,8 +210,10 @@
"TCP ECN");
VNET_DEFINE(int, tcp_do_ecn) = 2;
-SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, enable, CTLFLAG_VNET | CTLFLAG_RW,
- &VNET_NAME(tcp_do_ecn), 0,
+static int sysctl_net_inet_tcp_ecn_enable_check(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_ecn, OID_AUTO, enable,
+ CTLFLAG_VNET | CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_NEEDGIANT,
+ &VNET_NAME(tcp_do_ecn), 0, &sysctl_net_inet_tcp_ecn_enable_check, "IU",
"TCP ECN support");
VNET_DEFINE(int, tcp_ecn_maxretries) = 1;
@@ -219,6 +221,13 @@
&VNET_NAME(tcp_ecn_maxretries), 0,
"Max retries before giving up on ECN");
+VNET_DEFINE(int, tcp_ecn_generalized) = 0;
+static int sysctl_net_inet_tcp_ecn_generalized_check(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_ecn, OID_AUTO, generalized,
+ CTLFLAG_VNET | CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_NEEDGIANT,
+ &VNET_NAME(tcp_ecn_generalized), 0, &sysctl_net_inet_tcp_ecn_generalized_check, "IU",
+ "Send all packets as ECT");
+
VNET_DEFINE(int, tcp_insecure_syn) = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_syn, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_insecure_syn), 0,
@@ -1668,6 +1677,8 @@
((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2))) {
tp->t_flags2 |= TF2_ECN_PERMIT;
tp->t_flags2 |= TF2_ECN_SND_ECE;
+ if (V_tcp_ecn_generalized)
+ tp->t_flags2 |= TF2_ECN_PLUSPLUS;
TCPSTAT_INC(tcps_ecn_shs);
}
if ((to.to_flags & TOF_SCALE) &&
@@ -2078,6 +2089,8 @@
if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
(V_tcp_do_ecn == 1)) {
tp->t_flags2 |= TF2_ECN_PERMIT;
+ if (V_tcp_ecn_generalized)
+ tp->t_flags2 |= TF2_ECN_PLUSPLUS;
TCPSTAT_INC(tcps_ecn_shs);
}
@@ -4103,3 +4116,45 @@
return (4 * maxseg);
}
}
+
+static int
+sysctl_net_inet_tcp_ecn_enable_check(SYSCTL_HANDLER_ARGS)
+{
+ uint32_t new;
+ int error;
+
+ new = V_tcp_do_ecn;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error == 0 && req->newptr != NULL) {
+ if (new > 2)
+ error = EINVAL;
+ else {
+ V_tcp_do_ecn = new;
+ if (new == 0)
+ V_tcp_ecn_generalized = new;
+ }
+ }
+
+ return (error);
+}
+
+static int
+sysctl_net_inet_tcp_ecn_generalized_check(SYSCTL_HANDLER_ARGS)
+{
+ uint32_t new;
+ int error;
+
+ new = V_tcp_ecn_generalized;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error == 0 && req->newptr != NULL) {
+ if (new > 1)
+ error = EINVAL;
+ else
+ if (!V_tcp_do_ecn && new == 1)
+ error = EINVAL;
+ else
+ V_tcp_ecn_generalized = new;
+ }
+
+ return (error);
+}
Index: sys/netinet/tcp_output.c
===================================================================
--- sys/netinet/tcp_output.c
+++ sys/netinet/tcp_output.c
@@ -1211,17 +1211,32 @@
tp->t_flags2 &= ~TF2_ECN_SND_ECE;
}
- if (TCPS_HAVEESTABLISHED(tp->t_state) &&
- (tp->t_flags2 & TF2_ECN_PERMIT)) {
+ if ((TCPS_HAVEESTABLISHED(tp->t_state) &&
+ (tp->t_flags2 & TF2_ECN_PERMIT)) ||
+ /*
+ * Send ECN SYN segments as ECN-capable transport
+ * when ecn.generalized is set. This can not be
+ * futher simplified, as a fall-back to non-ECN
+ * may occur.
+ */
+ ((tp->t_flags2 & TF2_ECN_PLUSPLUS) &&
+ (((flags & (TH_SYN|TH_ACK|TH_ECE|TH_CWR)) ==
+ (TH_SYN| TH_ECE|TH_CWR)) ||
+ ((flags & (TH_SYN|TH_ACK|TH_ECE|TH_CWR)) ==
+ (TH_SYN|TH_ACK| TH_CWR)) ||
+ ((flags & (TH_SYN|TH_ACK|TH_ECE|TH_CWR)) ==
+ (TH_SYN|TH_ACK|TH_ECE ))))) {
/*
- * If the peer has ECN, mark data packets with
- * ECN capable transmission (ECT).
- * Ignore pure ack packets, retransmissions and window probes.
+ * If the peer has ECN, mark new data packets
+ * with ECN capable transmission (ECT).
+ * Ignore pure ack packets, retransmissions and
+ * window probes unless doing generalized ECN.
*/
- if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
- (sack_rxmit == 0) &&
+ if ((tp->t_flags2 & TF2_ECN_PLUSPLUS) ||
+ (len > 0 && (sack_rxmit == 0) &&
+ SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
!((tp->t_flags & TF_FORCEDATA) && len == 1 &&
- SEQ_LT(tp->snd_una, tp->snd_max))) {
+ SEQ_LT(tp->snd_una, tp->snd_max)))) {
#ifdef INET6
if (isipv6)
ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
@@ -1233,7 +1248,11 @@
* Reply with proper ECN notifications.
* Only set CWR on new data segments.
*/
- if (tp->t_flags2 & TF2_ECN_SND_CWR) {
+ if (tp->t_flags2 & TF2_ECN_SND_CWR &&
+ (len > 0 && (sack_rxmit == 0) &&
+ SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
+ !((tp->t_flags & TF_FORCEDATA) && len == 1 &&
+ SEQ_LT(tp->snd_una, tp->snd_max)))) {
flags |= TH_CWR;
tp->t_flags2 &= ~TF2_ECN_SND_CWR;
}
Index: sys/netinet/tcp_stacks/rack.c
===================================================================
--- sys/netinet/tcp_stacks/rack.c
+++ sys/netinet/tcp_stacks/rack.c
@@ -11414,6 +11414,8 @@
if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
(V_tcp_do_ecn == 1)) {
tp->t_flags2 |= TF2_ECN_PERMIT;
+ if (V_tcp_ecn_generalized)
+ tp->t_flags2 |= TF2_ECN_PLUSPLUS;
KMOD_TCPSTAT_INC(tcps_ecn_shs);
}
if (SEQ_GT(th->th_ack, tp->snd_una)) {
@@ -14526,6 +14528,8 @@
((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2))) {
tp->t_flags2 |= TF2_ECN_PERMIT;
tp->t_flags2 |= TF2_ECN_SND_ECE;
+ if (V_tcp_ecn_generalized)
+ tp->t_flags2 |= TF2_ECN_PLUSPLUS;
TCPSTAT_INC(tcps_ecn_shs);
}
if ((to.to_flags & TOF_SCALE) &&
@@ -18609,15 +18613,29 @@
flags |= TH_ECE;
tp->t_flags2 &= ~TF2_ECN_SND_ECE;
}
- if (TCPS_HAVEESTABLISHED(tp->t_state) &&
- (tp->t_flags2 & TF2_ECN_PERMIT)) {
+ if ((TCPS_HAVEESTABLISHED(tp->t_state) &&
+ (tp->t_flags2 & TF2_ECN_PERMIT)) ||
+ /*
+ * Send ECN SYN segments as ECN-capable transport
+ * when ecn.generalized is set. This can not be
+ * futher simplified, as a fall-back to non-ECN
+ * may occur.
+ */
+ ((tp->t_flags2 & TF2_ECN_PLUSPLUS) &&
+ (((flags & (TH_SYN|TH_ACK|TH_ECE|TH_CWR)) ==
+ (TH_SYN| TH_ECE|TH_CWR)) ||
+ ((flags & (TH_SYN|TH_ACK|TH_ECE|TH_CWR)) ==
+ (TH_SYN|TH_ACK| TH_CWR)) ||
+ ((flags & (TH_SYN|TH_ACK|TH_ECE|TH_CWR)) ==
+ (TH_SYN|TH_ACK|TH_ECE ))))) {
/*
* If the peer has ECN, mark data packets with ECN capable
* transmission (ECT). Ignore pure ack packets,
- * retransmissions.
+ * retransmissions unless doing generalized ECN.
*/
- if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
- (sack_rxmit == 0)) {
+ if ((tp->t_flags2 & TF2_ECN_PLUSPLUS) ||
+ ((len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
+ (sack_rxmit == 0)))) {
#ifdef INET6
if (isipv6)
ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
@@ -18629,7 +18647,9 @@
* Reply with proper ECN notifications.
* Only set CWR on new data segments.
*/
- if (tp->t_flags2 & TF2_ECN_SND_CWR) {
+ if ((tp->t_flags2 & TF2_ECN_SND_CWR) &&
+ (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
+ (sack_rxmit == 0))) {
flags |= TH_CWR;
tp->t_flags2 &= ~TF2_ECN_SND_CWR;
}
Index: sys/netinet/tcp_subr.c
===================================================================
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -2035,6 +2035,26 @@
}
#endif
+ /*
+ * Send out control packets with same IP ECN header
+ * bits, as when an established or listening socket
+ * would exist.
+ */
+ if (V_tcp_ecn_generalized && ((V_tcp_do_ecn == 1) ||
+ ((tp != NULL) && (tp->t_flags2 & TF2_ECN_PERMIT)))) {
+#ifdef INET6
+ if (isipv6)
+ ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
+#endif /* INET6 */
+#if defined(INET6) && defined(INET)
+ else
+#endif
+#ifdef INET
+ ip->ip_tos |= IPTOS_ECN_ECT0;
+#endif /* INET */
+ }
+
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
#ifdef INET6
if (isipv6) {
if (port) {
Index: sys/netinet/tcp_syncache.c
===================================================================
--- sys/netinet/tcp_syncache.c
+++ sys/netinet/tcp_syncache.c
@@ -1027,8 +1027,11 @@
tp->t_flags |= TF_SACK_PERMIT;
}
- if (sc->sc_flags & SCF_ECN)
+ if (sc->sc_flags & SCF_ECN) {
tp->t_flags2 |= TF2_ECN_PERMIT;
+ if (V_tcp_ecn_generalized)
+ tp->t_flags2 |= TF2_ECN_PLUSPLUS;
+ }
/*
* Set up MSS and get cached values from tcp_hostcache.
@@ -1943,6 +1946,21 @@
if ((flags & TH_SYN) && (sc->sc_flags & SCF_ECN)) {
th->th_flags |= TH_ECE;
TCPSTAT_INC(tcps_ecn_shs);
+
+ if ((V_tcp_ecn_generalized &&
+ (flags & TH_ACK))) {
+#ifdef INET6
+ if (sc->sc_inc.inc_flags & INC_ISIPV6)
+ ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
+#endif
+#if defined(INET6) && defined(INET)
+ else
+#endif
+#ifdef INET
+ ip->ip_tos |= IPTOS_ECN_ECT0;
+#endif
+ TCPSTAT_INC(tcps_ecn_ect0);
+ }
}
/* Tack on the TCP options. */
Index: sys/netinet/tcp_usrreq.c
===================================================================
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -3009,6 +3009,10 @@
db_printf("%sTF2_ECN_PERMIT", comma ? ", " : "");
comma = 1;
}
+ if (t_flags2 & TF2_ECN_PLUSPLUS) {
+ db_printf("%sTF2_ECN_PLUSPLUS", comma ? ", " : "");
+ comma = 1;
+ }
}
static void
Index: sys/netinet/tcp_var.h
===================================================================
--- sys/netinet/tcp_var.h
+++ sys/netinet/tcp_var.h
@@ -568,6 +568,7 @@
#define TF2_ECN_SND_CWR 0x00000040 /* ECN CWR in queue */
#define TF2_ECN_SND_ECE 0x00000080 /* ECN ECE in queue */
#define TF2_ACE_PERMIT 0x00000100 /* Accurate ECN mode */
+#define TF2_ECN_PLUSPLUS 0x00000200 /* ECN++ session */
#define TF2_FBYTES_COMPLETE 0x00000400 /* We have first bytes in and out */
/*
* Structure to hold TCP options that are only used during segment
@@ -997,6 +998,7 @@
VNET_DECLARE(int, tcp_do_sack);
VNET_DECLARE(int, tcp_do_tso);
VNET_DECLARE(int, tcp_ecn_maxretries);
+VNET_DECLARE(int, tcp_ecn_generalized);
VNET_DECLARE(int, tcp_initcwnd_segments);
VNET_DECLARE(int, tcp_insecure_rst);
VNET_DECLARE(int, tcp_insecure_syn);
@@ -1043,6 +1045,7 @@
#define V_tcp_do_sack VNET(tcp_do_sack)
#define V_tcp_do_tso VNET(tcp_do_tso)
#define V_tcp_ecn_maxretries VNET(tcp_ecn_maxretries)
+#define V_tcp_ecn_generalized VNET(tcp_ecn_generalized)
#define V_tcp_initcwnd_segments VNET(tcp_initcwnd_segments)
#define V_tcp_insecure_rst VNET(tcp_insecure_rst)
#define V_tcp_insecure_syn VNET(tcp_insecure_syn)

File Metadata

Mime Type
text/plain
Expires
Mon, Jan 27, 6:03 PM (4 h, 46 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16202773
Default Alt Text
D23230.id102159.diff (11 KB)

Event Timeline