Page MenuHomeFreeBSD

D34162.diff
No OneTemporary

D34162.diff

Index: sys/conf/files
===================================================================
--- sys/conf/files
+++ sys/conf/files
@@ -4364,6 +4364,7 @@
netinet/sctputil.c optional inet sctp | inet6 sctp
netinet/siftr.c optional inet siftr alq | inet6 siftr alq
netinet/tcp_debug.c optional tcpdebug
+netinet/tcp_ecn.c optional inet | inet6
netinet/tcp_fastopen.c optional inet tcp_rfc7413 | inet6 tcp_rfc7413
netinet/tcp_hostcache.c optional inet | inet6
netinet/tcp_input.c optional inet | inet6
Index: sys/netinet/tcp_ecn.h
===================================================================
--- /dev/null
+++ sys/netinet/tcp_ecn.h
@@ -0,0 +1,55 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 1982, 1986, 1993, 1994, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_ecn.h 8.4 (Berkeley) 5/24/95
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_TCP_ECN_H_
+#define _NETINET_TCP_ECN_H_
+
+#ifdef _KERNEL
+
+#include <netinet/tcp.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_syncache.h>
+
+void tcp_ecn_input_syn_sent(struct tcpcb *, uint16_t, int);
+void tcp_ecn_input_parallel_syn(struct tcpcb *, uint16_t, int);
+int tcp_ecn_input_segment(struct tcpcb *, uint16_t, int);
+uint16_t tcp_ecn_output_syn_sent(struct tcpcb *);
+int tcp_ecn_output_established(struct tcpcb *, uint16_t *, int);
+void tcp_ecn_syncache_socket(struct tcpcb *, struct syncache *);
+int tcp_ecn_syncache_add(uint16_t, int);
+uint16_t tcp_ecn_syncache_respond(uint16_t, struct syncache *);
+
+#endif /* _KERNEL */
+
+#endif /* _NETINET_TCP_ECN_H_ */
Index: sys/netinet/tcp_ecn.c
===================================================================
--- /dev/null
+++ sys/netinet/tcp_ecn.c
@@ -0,0 +1,297 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
+ * The Regents of the University of California. All rights reserved.
+ * Copyright (c) 2007-2008,2010
+ * Swinburne University of Technology, Melbourne, Australia.
+ * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
+ * Copyright (c) 2010 The FreeBSD Foundation
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * Copyright (c) 2019 Richard Scheffenegger <srichard@netapp.com>
+ * All rights reserved.
+ *
+ * Portions of this software were developed at the Centre for Advanced Internet
+ * Architectures, Swinburne University of Technology, by Lawrence Stewart,
+ * James Healy and David Hayes, made possible in part by a grant from the Cisco
+ * University Research Program Fund at Community Foundation Silicon Valley.
+ *
+ * Portions of this software were developed at the Centre for Advanced
+ * Internet Architectures, Swinburne University of Technology, Melbourne,
+ * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
+ *
+ * Portions of this software were developed by Robert N. M. Watson under
+ * contract to Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_ecn.c 8.12 (Berkeley) 5/24/95
+ */
+
+/*
+ * Utility functions to deal with Explicit Congestion Notification in TCP
+ * implementing the essential parts of the Accurate ECN extension
+ * https://tools.ietf.org/html/draft-ietf-tcpm-accurate-ecn-09
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include "opt_tcpdebug.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <machine/cpu.h>
+
+#include <vm/uma.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/route.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_var.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet6/nd6.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/in6_pcb.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_syncache.h>
+#include <netinet/tcp_timer.h>
+#include <netinet6/tcp6_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_ecn.h>
+
+
+/*
+ * Process incoming SYN,ACK packet
+ */
+void
+tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos)
+{
+ thflags &= (TH_CWR|TH_ECE);
+
+ if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
+ V_tcp_do_ecn) {
+ tp->t_flags2 |= TF2_ECN_PERMIT;
+ KMOD_TCPSTAT_INC(tcps_ecn_shs);
+ }
+}
+
+/*
+ * Handle parallel SYN for ECN
+ */
+void
+tcp_ecn_input_parallel_syn(struct tcpcb *tp, uint16_t thflags, int iptos)
+{
+ if (thflags & TH_ACK)
+ return;
+ if (V_tcp_do_ecn == 0)
+ return;
+ if ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2)) {
+ /* RFC3168 ECN handling */
+ if ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) {
+ tp->t_flags2 |= TF2_ECN_PERMIT;
+ tp->t_flags2 |= TF2_ECN_SND_ECE;
+ KMOD_TCPSTAT_INC(tcps_ecn_shs);
+ }
+ }
+}
+
+/*
+ * TCP ECN processing.
+ */
+int
+tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int iptos)
+{
+ int delta_ace = 0;
+
+ if (tp->t_flags2 & TF2_ECN_PERMIT) {
+ switch (iptos & IPTOS_ECN_MASK) {
+ case IPTOS_ECN_CE:
+ KMOD_TCPSTAT_INC(tcps_ecn_ce);
+ break;
+ case IPTOS_ECN_ECT0:
+ KMOD_TCPSTAT_INC(tcps_ecn_ect0);
+ break;
+ case IPTOS_ECN_ECT1:
+ KMOD_TCPSTAT_INC(tcps_ecn_ect1);
+ break;
+ }
+
+ /* RFC3168 ECN handling */
+ if (thflags & TH_ECE)
+ delta_ace = 1;
+ if (thflags & TH_CWR) {
+ tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+ tp->t_flags |= TF_ACKNOW;
+ }
+ if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+ tp->t_flags2 |= TF2_ECN_SND_ECE;
+
+ /* Process a packet differently from RFC3168. */
+ cc_ecnpkt_handler_flags(tp, thflags, iptos);
+ }
+
+ return delta_ace;
+}
+
+/*
+ * Send ECN setup <SYN> packet header flags
+ */
+uint16_t
+tcp_ecn_output_syn_sent(struct tcpcb *tp)
+{
+ uint16_t thflags = 0;
+
+ if (V_tcp_do_ecn == 1) {
+ /* Send a RFC3168 ECN setup <SYN> packet */
+ if (tp->t_rxtshift >= 1) {
+ if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
+ thflags = TH_ECE|TH_CWR;
+ } else
+ thflags = TH_ECE|TH_CWR;
+ }
+
+ return thflags;
+}
+
+/*
+ * output processing of ECN feature
+ * returning IP ECN header codepoint
+ */
+int
+tcp_ecn_output_established(struct tcpcb *tp, uint16_t *thflags, int len)
+{
+ int ipecn = IPTOS_ECN_NOTECT;
+ bool newdata;
+
+ /*
+ * If the peer has ECN, mark data packets with
+ * ECN capable transmission (ECT).
+ * Ignore pure control packets, retransmissions
+ * and window probes.
+ */
+ newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
+ !((tp->t_flags & TF_FORCEDATA) && len == 1));
+ if (newdata) {
+ ipecn = IPTOS_ECN_ECT0;
+ KMOD_TCPSTAT_INC(tcps_ecn_ect0);
+ }
+ /*
+ * Reply with proper ECN notifications.
+ */
+ if (newdata &&
+ (tp->t_flags2 & TF2_ECN_SND_CWR)) {
+ *thflags |= TH_CWR;
+ tp->t_flags2 &= ~TF2_ECN_SND_CWR;
+ }
+ if (tp->t_flags2 & TF2_ECN_SND_ECE)
+ *thflags |= TH_ECE;
+
+ return ipecn;
+}
+
+/*
+ * Set up the ECN related tcpcb fields from
+ * a syncache entry
+ */
+void
+tcp_ecn_syncache_socket(struct tcpcb *tp, struct syncache *sc)
+{
+ if (sc->sc_flags & SCF_ECN) {
+ switch (sc->sc_flags & SCF_ECN) {
+ case SCF_ECN:
+ tp->t_flags2 |= TF2_ECN_PERMIT;
+ break;
+ /* undefined SCF codepoint */
+ default:
+ break;
+ }
+ }
+}
+
+/*
+ * Process a <SYN> packets ECN information, and provide the
+ * syncache with the relevant information.
+ */
+int
+tcp_ecn_syncache_add(uint16_t thflags, int iptos)
+{
+ int scflags = 0;
+
+ switch (thflags & (TH_CWR|TH_ECE)) {
+ /* no ECN */
+ case (0|0):
+ break;
+ /* legacy ECN */
+ case (TH_CWR|TH_ECE):
+ scflags = SCF_ECN;
+ break;
+ default:
+ break;
+ }
+ return scflags;
+}
+
+/*
+ * Set up the ECN information for the <SYN,ACK> from
+ * syncache information.
+ */
+uint16_t
+tcp_ecn_syncache_respond(uint16_t thflags, struct syncache *sc)
+{
+ if ((thflags & TH_SYN) &&
+ (sc->sc_flags & SCF_ECN)) {
+ switch (sc->sc_flags & SCF_ECN) {
+ case SCF_ECN:
+ thflags |= (0 | TH_ECE);
+ KMOD_TCPSTAT_INC(tcps_ecn_shs);
+ break;
+ /* undefined SCF codepoint */
+ default:
+ break;
+ }
+ }
+ return thflags;
+}
Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -123,6 +123,7 @@
#ifdef TCP_OFFLOAD
#include <netinet/tcp_offload.h>
#endif
+#include <netinet/tcp_ecn.h>
#include <netinet/udp.h>
#include <netipsec/ipsec_support.h>
@@ -1517,7 +1518,8 @@
tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos)
{
- int thflags, acked, ourfinisacked, needoutput = 0, sack_changed;
+ uint16_t thflags;
+ int acked, ourfinisacked, needoutput = 0, sack_changed;
int rstreason, todrop, win, incforsyn = 0;
uint32_t tiwin;
uint16_t nsegs;
@@ -1597,32 +1599,8 @@
/*
* TCP ECN processing.
*/
- if (tp->t_flags2 & TF2_ECN_PERMIT) {
- if (thflags & TH_CWR) {
- tp->t_flags2 &= ~TF2_ECN_SND_ECE;
- tp->t_flags |= TF_ACKNOW;
- }
- switch (iptos & IPTOS_ECN_MASK) {
- case IPTOS_ECN_CE:
- tp->t_flags2 |= TF2_ECN_SND_ECE;
- TCPSTAT_INC(tcps_ecn_ce);
- break;
- case IPTOS_ECN_ECT0:
- TCPSTAT_INC(tcps_ecn_ect0);
- break;
- case IPTOS_ECN_ECT1:
- TCPSTAT_INC(tcps_ecn_ect1);
- break;
- }
-
- /* Process a packet differently from RFC3168. */
- cc_ecnpkt_handler(tp, th, iptos);
-
- /* Congestion experienced. */
- if (thflags & TH_ECE) {
- cc_cong_signal(tp, th, CC_ECN);
- }
- }
+ if (tcp_ecn_input_segment(tp, thflags, iptos))
+ cc_cong_signal(tp, th, CC_ECN);
/*
* Parse options on any incoming segment.
@@ -1663,13 +1641,7 @@
*/
if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
/* Handle parallel SYN for ECN */
- if (!(thflags & TH_ACK) &&
- ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) &&
- ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2))) {
- tp->t_flags2 |= TF2_ECN_PERMIT;
- tp->t_flags2 |= TF2_ECN_SND_ECE;
- TCPSTAT_INC(tcps_ecn_shs);
- }
+ tcp_ecn_input_parallel_syn(tp, thflags, iptos);
if ((to.to_flags & TOF_SCALE) &&
(tp->t_flags & TF_REQ_SCALE) &&
!(tp->t_flags & TF_NOOPT)) {
@@ -2075,11 +2047,7 @@
else
tp->t_flags |= TF_ACKNOW;
- if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
- (V_tcp_do_ecn == 1)) {
- tp->t_flags2 |= TF2_ECN_PERMIT;
- TCPSTAT_INC(tcps_ecn_shs);
- }
+ tcp_ecn_input_syn_sent(tp, thflags, iptos);
/*
* Received <SYN,ACK> in SYN_SENT[*] state.
Index: sys/netinet/tcp_output.c
===================================================================
--- sys/netinet/tcp_output.c
+++ sys/netinet/tcp_output.c
@@ -84,8 +84,9 @@
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_log_buf.h>
#include <netinet/tcp_seq.h>
-#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
+#include <netinet/tcp_syncache.h>
+#include <netinet/tcp_timer.h>
#include <netinet/tcpip.h>
#include <netinet/cc/cc.h>
#include <netinet/tcp_fastopen.h>
@@ -98,6 +99,7 @@
#ifdef TCP_OFFLOAD
#include <netinet/tcp_offload.h>
#endif
+#include <netinet/tcp_ecn.h>
#include <netipsec/ipsec_support.h>
@@ -199,7 +201,8 @@
struct socket *so = tp->t_inpcb->inp_socket;
int32_t len;
uint32_t recwin, sendwin;
- int off, flags, error = 0; /* Keep compiler happy */
+ uint16_t flags;
+ int off, error = 0; /* Keep compiler happy */
u_int if_hw_tsomaxsegcount = 0;
u_int if_hw_tsomaxsegsize = 0;
struct mbuf *m;
@@ -1197,54 +1200,27 @@
* resend those bits a number of times as per
* RFC 3168.
*/
- if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 1) {
- if (tp->t_rxtshift >= 1) {
- if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
- flags |= TH_ECE|TH_CWR;
- } else
- flags |= TH_ECE|TH_CWR;
- }
- /* Handle parallel SYN for ECN */
- if ((tp->t_state == TCPS_SYN_RECEIVED) &&
- (tp->t_flags2 & TF2_ECN_SND_ECE)) {
- flags |= TH_ECE;
- tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+ if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
+ flags |= tcp_ecn_output_syn_sent(tp);
}
-
- if (TCPS_HAVEESTABLISHED(tp->t_state) &&
+ /* Also handle parallel SYN for ECN */
+ if ((TCPS_HAVERCVDSYN(tp->t_state)) &&
(tp->t_flags2 & TF2_ECN_PERMIT)) {
- /*
- * If the peer has ECN, mark data packets with
- * ECN capable transmission (ECT).
- * Ignore pure ack packets, retransmissions and window probes.
- */
- if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
- (sack_rxmit == 0) &&
- !((tp->t_flags & TF_FORCEDATA) && len == 1 &&
- SEQ_LT(tp->snd_una, tp->snd_max))) {
+ int ect = tcp_ecn_output_established(tp, &flags, len);
+ if ((tp->t_state == TCPS_SYN_RECEIVED) &&
+ (tp->t_flags2 & TF2_ECN_SND_ECE))
+ tp->t_flags2 &= ~TF2_ECN_SND_ECE;
#ifdef INET6
- if (isipv6) {
- ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
- ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
- }
- else
+ if (isipv6) {
+ ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
+ ip6->ip6_flow |= htonl(ect << 20);
+ }
+ else
#endif
- {
- ip->ip_tos &= ~IPTOS_ECN_MASK;
- ip->ip_tos |= IPTOS_ECN_ECT0;
- }
- TCPSTAT_INC(tcps_ecn_ect0);
- /*
- * Reply with proper ECN notifications.
- * Only set CWR on new data segments.
- */
- if (tp->t_flags2 & TF2_ECN_SND_CWR) {
- flags |= TH_CWR;
- tp->t_flags2 &= ~TF2_ECN_SND_CWR;
- }
+ {
+ ip->ip_tos &= ~IPTOS_ECN_MASK;
+ ip->ip_tos |= ect;
}
- if (tp->t_flags2 & TF2_ECN_SND_ECE)
- flags |= TH_ECE;
}
/*
Index: sys/netinet/tcp_stacks/rack.c
===================================================================
--- sys/netinet/tcp_stacks/rack.c
+++ sys/netinet/tcp_stacks/rack.c
@@ -93,6 +93,7 @@
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
+#include <netinet/tcp_syncache.h>
#include <netinet/tcp_hpts.h>
#include <netinet/tcp_ratelimit.h>
#include <netinet/tcp_accounting.h>
@@ -113,6 +114,7 @@
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
+#include <netinet/tcp_ecn.h>
#include <netipsec/ipsec_support.h>
@@ -11406,11 +11408,9 @@
tp->t_flags |= TF_ACKNOW;
rack->rc_dack_toggle = 0;
}
- if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
- (V_tcp_do_ecn == 1)) {
- tp->t_flags2 |= TF2_ECN_PERMIT;
- KMOD_TCPSTAT_INC(tcps_ecn_shs);
- }
+
+ tcp_ecn_input_syn_sent(tp, thflags, iptos);
+
if (SEQ_GT(th->th_ack, tp->snd_una)) {
/*
* We advance snd_una for the
@@ -13683,31 +13683,8 @@
}
tp->t_rcvtime = ticks;
/* Now what about ECN? */
- if (tp->t_flags2 & TF2_ECN_PERMIT) {
- if (ae->flags & TH_CWR) {
- tp->t_flags2 &= ~TF2_ECN_SND_ECE;
- tp->t_flags |= TF_ACKNOW;
- }
- switch (ae->codepoint & IPTOS_ECN_MASK) {
- case IPTOS_ECN_CE:
- tp->t_flags2 |= TF2_ECN_SND_ECE;
- KMOD_TCPSTAT_INC(tcps_ecn_ce);
- break;
- case IPTOS_ECN_ECT0:
- KMOD_TCPSTAT_INC(tcps_ecn_ect0);
- break;
- case IPTOS_ECN_ECT1:
- KMOD_TCPSTAT_INC(tcps_ecn_ect1);
- break;
- }
-
- /* Process a packet differently from RFC3168. */
- cc_ecnpkt_handler_flags(tp, ae->flags, ae->codepoint);
- /* Congestion experienced. */
- if (ae->flags & TH_ECE) {
- rack_cong_signal(tp, CC_ECN, ae->ack);
- }
- }
+ if (tcp_ecn_input_segment(tp, ae->flags, ae->codepoint))
+ rack_cong_signal(tp, CC_ECN, ae->ack);
#ifdef TCP_ACCOUNTING
/* Count for the specific type of ack in */
counter_u64_add(tcp_cnt_counters[ae->ack_val_set], 1);
@@ -14457,32 +14434,8 @@
* TCP ECN processing. XXXJTL: If we ever use ECN, we need to move
* this to occur after we've validated the segment.
*/
- if (tp->t_flags2 & TF2_ECN_PERMIT) {
- if (thflags & TH_CWR) {
- tp->t_flags2 &= ~TF2_ECN_SND_ECE;
- tp->t_flags |= TF_ACKNOW;
- }
- switch (iptos & IPTOS_ECN_MASK) {
- case IPTOS_ECN_CE:
- tp->t_flags2 |= TF2_ECN_SND_ECE;
- KMOD_TCPSTAT_INC(tcps_ecn_ce);
- break;
- case IPTOS_ECN_ECT0:
- KMOD_TCPSTAT_INC(tcps_ecn_ect0);
- break;
- case IPTOS_ECN_ECT1:
- KMOD_TCPSTAT_INC(tcps_ecn_ect1);
- break;
- }
-
- /* Process a packet differently from RFC3168. */
- cc_ecnpkt_handler(tp, th, iptos);
-
- /* Congestion experienced. */
- if (thflags & TH_ECE) {
- rack_cong_signal(tp, CC_ECN, th->th_ack);
- }
- }
+ if (tcp_ecn_input_segment(tp, thflags, iptos))
+ rack_cong_signal(tp, CC_ECN, th->th_ack);
/*
* If echoed timestamp is later than the current time, fall back to
@@ -14516,13 +14469,7 @@
*/
if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
/* Handle parallel SYN for ECN */
- if (!(thflags & TH_ACK) &&
- ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) &&
- ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2))) {
- tp->t_flags2 |= TF2_ECN_PERMIT;
- tp->t_flags2 |= TF2_ECN_SND_ECE;
- TCPSTAT_INC(tcps_ecn_shs);
- }
+ tcp_ecn_input_parallel_syn(tp, thflags, iptos);
if ((to.to_flags & TOF_SCALE) &&
(tp->t_flags & TF_REQ_SCALE)) {
tp->t_flags |= TF_RCVD_SCALE;
@@ -15879,7 +15826,8 @@
struct tcpopt to;
u_char opt[TCP_MAXOLEN];
uint32_t hdrlen, optlen;
- int32_t slot, segsiz, max_val, tso = 0, error, flags, ulen = 0;
+ int32_t slot, segsiz, max_val, tso = 0, error, ulen = 0;
+ uint16_t flags;
uint32_t if_hw_tsomaxsegcount = 0, startseq;
uint32_t if_hw_tsomaxsegsize;
@@ -16006,7 +15954,6 @@
if ((rsm->r_flags & RACK_HAD_PUSH) &&
(len == (rsm->r_end - rsm->r_start)))
flags |= TH_PUSH;
- tcp_set_flags(th, flags);
th->th_win = htons((u_short)(rack->r_ctl.fsb.recwin >> tp->rcv_scale));
if (th->th_win == 0) {
tp->t_sndzerowin++;
@@ -16056,6 +16003,25 @@
udp->uh_ulen = htons(ulen);
}
m->m_pkthdr.rcvif = (struct ifnet *)0;
+ if (TCPS_HAVERCVDSYN(tp->t_state) &&
+ (tp->t_flags2 & TF2_ECN_PERMIT)) {
+ int ect = tcp_ecn_output_established(tp, &flags, len);
+ if ((tp->t_state == TCPS_SYN_RECEIVED) &&
+ (tp->t_flags2 & TF2_ECN_SND_ECE))
+ tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+#ifdef INET6
+ if (rack->r_is_v6) {
+ ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
+ ip6->ip6_flow |= htonl(ect << 20);
+ }
+ else
+#endif
+ {
+ ip->ip_tos &= ~IPTOS_ECN_MASK;
+ ip->ip_tos |= ect;
+ }
+ }
+ tcp_set_flags(th, flags);
m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
#ifdef INET6
if (rack->r_is_v6) {
@@ -16379,7 +16345,8 @@
u_char opt[TCP_MAXOLEN];
uint32_t hdrlen, optlen;
int cnt_thru = 1;
- int32_t slot, segsiz, len, max_val, tso = 0, sb_offset, error, flags, ulen = 0;
+ int32_t slot, segsiz, len, max_val, tso = 0, sb_offset, error, ulen = 0;
+ uint16_t flags;
uint32_t s_soff;
uint32_t if_hw_tsomaxsegcount = 0, startseq;
uint32_t if_hw_tsomaxsegsize;
@@ -16528,37 +16495,23 @@
udp->uh_ulen = htons(ulen);
}
m->m_pkthdr.rcvif = (struct ifnet *)0;
- if (tp->t_state == TCPS_ESTABLISHED &&
+ if (TCPS_HAVERCVDSYN(tp->t_state) &&
(tp->t_flags2 & TF2_ECN_PERMIT)) {
- /*
- * If the peer has ECN, mark data packets with ECN capable
- * transmission (ECT). Ignore pure ack packets,
- * retransmissions.
- */
- if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max)) {
+ int ect = tcp_ecn_output_established(tp, &flags, len);
+ if ((tp->t_state == TCPS_SYN_RECEIVED) &&
+ (tp->t_flags2 & TF2_ECN_SND_ECE))
+ tp->t_flags2 &= ~TF2_ECN_SND_ECE;
#ifdef INET6
- if (rack->r_is_v6) {
- ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
- ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
- }
- else
+ if (rack->r_is_v6) {
+ ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
+ ip6->ip6_flow |= htonl(ect << 20);
+ }
+ else
#endif
- {
- ip->ip_tos &= ~IPTOS_ECN_MASK;
- ip->ip_tos |= IPTOS_ECN_ECT0;
- }
- KMOD_TCPSTAT_INC(tcps_ecn_ect0);
- /*
- * Reply with proper ECN notifications.
- * Only set CWR on new data segments.
- */
- if (tp->t_flags2 & TF2_ECN_SND_CWR) {
- flags |= TH_CWR;
- tp->t_flags2 &= ~TF2_ECN_SND_CWR;
- }
+ {
+ ip->ip_tos &= ~IPTOS_ECN_MASK;
+ ip->ip_tos |= ect;
}
- if (tp->t_flags2 & TF2_ECN_SND_ECE)
- flags |= TH_ECE;
}
m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
#ifdef INET6
@@ -16786,7 +16739,8 @@
struct socket *so;
uint32_t recwin;
uint32_t sb_offset, s_moff = 0;
- int32_t len, flags, error = 0;
+ int32_t len, error = 0;
+ uint16_t flags;
struct mbuf *m, *s_mb = NULL;
struct mbuf *mb;
uint32_t if_hw_tsomaxsegcount = 0;
@@ -18596,51 +18550,27 @@
* are on a retransmit, we may resend those bits a number of times
* as per RFC 3168.
*/
- if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 1) {
- if (tp->t_rxtshift >= 1) {
- if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
- flags |= TH_ECE | TH_CWR;
- } else
- flags |= TH_ECE | TH_CWR;
+ if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
+ flags |= tcp_ecn_output_syn_sent(tp);
}
- /* Handle parallel SYN for ECN */
- if ((tp->t_state == TCPS_SYN_RECEIVED) &&
- (tp->t_flags2 & TF2_ECN_SND_ECE)) {
- flags |= TH_ECE;
- tp->t_flags2 &= ~TF2_ECN_SND_ECE;
- }
- if (TCPS_HAVEESTABLISHED(tp->t_state) &&
+ /* Also handle parallel SYN for ECN */
+ if (TCPS_HAVERCVDSYN(tp->t_state) &&
(tp->t_flags2 & TF2_ECN_PERMIT)) {
- /*
- * If the peer has ECN, mark data packets with ECN capable
- * transmission (ECT). Ignore pure ack packets,
- * retransmissions.
- */
- if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
- (sack_rxmit == 0)) {
+ int ect = tcp_ecn_output_established(tp, &flags, len);
+ if ((tp->t_state == TCPS_SYN_RECEIVED) &&
+ (tp->t_flags2 & TF2_ECN_SND_ECE))
+ tp->t_flags2 &= ~TF2_ECN_SND_ECE;
#ifdef INET6
- if (isipv6) {
- ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
- ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
- }
- else
+ if (isipv6) {
+ ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
+ ip6->ip6_flow |= htonl(ect << 20);
+ }
+ else
#endif
- {
- ip->ip_tos &= ~IPTOS_ECN_MASK;
- ip->ip_tos |= IPTOS_ECN_ECT0;
- }
- KMOD_TCPSTAT_INC(tcps_ecn_ect0);
- /*
- * Reply with proper ECN notifications.
- * Only set CWR on new data segments.
- */
- if (tp->t_flags2 & TF2_ECN_SND_CWR) {
- flags |= TH_CWR;
- tp->t_flags2 &= ~TF2_ECN_SND_CWR;
- }
+ {
+ ip->ip_tos &= ~IPTOS_ECN_MASK;
+ ip->ip_tos |= ect;
}
- if (tp->t_flags2 & TF2_ECN_SND_ECE)
- flags |= TH_ECE;
}
/*
* If we are doing retransmissions, then snd_nxt will not reflect
Index: sys/netinet/tcp_syncache.c
===================================================================
--- sys/netinet/tcp_syncache.c
+++ sys/netinet/tcp_syncache.c
@@ -89,6 +89,7 @@
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_syncache.h>
+#include <netinet/tcp_ecn.h>
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
@@ -1027,8 +1028,7 @@
tp->t_flags |= TF_SACK_PERMIT;
}
- if (sc->sc_flags & SCF_ECN)
- tp->t_flags2 |= TF2_ECN_PERMIT;
+ tcp_ecn_syncache_socket(tp, sc);
/*
* Set up MSS and get cached values from tcp_hostcache.
@@ -1743,9 +1743,9 @@
sc->sc_peer_mss = to->to_mss; /* peer mss may be zero */
if (ltflags & TF_NOOPT)
sc->sc_flags |= SCF_NOOPT;
- if (((tcp_get_flags(th) & (TH_ECE|TH_CWR)) == (TH_ECE|TH_CWR)) &&
- V_tcp_do_ecn)
- sc->sc_flags |= SCF_ECN;
+ /* ECN Handshake */
+ if (V_tcp_do_ecn)
+ sc->sc_flags |= tcp_ecn_syncache_add(tcp_get_flags(th), iptos);
if (V_tcp_syncookies)
sc->sc_iss = syncookie_generate(sch, sc);
@@ -1938,10 +1938,7 @@
th->th_win = htons(sc->sc_wnd);
th->th_urp = 0;
- if ((flags & TH_SYN) && (sc->sc_flags & SCF_ECN)) {
- flags |= TH_ECE;
- TCPSTAT_INC(tcps_ecn_shs);
- }
+ flags = tcp_ecn_syncache_respond(flags, sc);
tcp_set_flags(th, flags);
/* Tack on the TCP options. */

File Metadata

Mime Type
text/plain
Expires
Wed, Feb 5, 2:04 PM (21 h, 17 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16475410
Default Alt Text
D34162.diff (26 KB)

Event Timeline