Page MenuHomeFreeBSD

D14993.id41201.diff
No OneTemporary

D14993.id41201.diff

Index: sys/netinet/tcp.h
===================================================================
--- sys/netinet/tcp.h
+++ sys/netinet/tcp.h
@@ -176,6 +176,7 @@
device */
#define TCP_CONGESTION 64 /* get/set congestion control algorithm */
#define TCP_CCALGOOPT 65 /* get/set cc algorithm specific options */
+#define TCP_MAXUNACKTIME 68 /* max time without making progress (sec) */
#define TCP_KEEPINIT 128 /* N, time to establish connection */
#define TCP_KEEPIDLE 256 /* L,N,X start keeplives after this period */
#define TCP_KEEPINTVL 512 /* L,N interval between keepalives */
Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -1874,11 +1874,21 @@
&tcp_savetcp, 0);
#endif
TCP_PROBE3(debug__input, tp, th, m);
+ /*
+ * Clear t_acktime if remote side has ACKd
+ * all data in the socket buffer.
+ * Otherwise, update t_acktime if we received
+ * a sufficiently large ACK.
+ */
+ if (sbavail(&so->so_snd) == 0)
+ tp->t_acktime = 0;
+ else if (acked > 1)
+ tp->t_acktime = ticks;
if (tp->snd_una == tp->snd_max)
tcp_timer_activate(tp, TT_REXMT, 0);
else if (!tcp_timer_active(tp, TT_PERSIST))
tcp_timer_activate(tp, TT_REXMT,
- tp->t_rxtcur);
+ TP_RXTCUR(tp));
sowwakeup(so);
if (sbavail(&so->so_snd))
(void) tp->t_fb->tfb_tcp_output(tp);
@@ -2073,6 +2083,7 @@
*/
tp->t_starttime = ticks;
if (tp->t_flags & TF_NEEDFIN) {
+ tp->t_acktime = ticks;
tcp_state_change(tp, TCPS_FIN_WAIT_1);
tp->t_flags &= ~TF_NEEDFIN;
thflags &= ~TH_SYN;
@@ -2452,6 +2463,7 @@
*/
tp->t_starttime = ticks;
if (tp->t_flags & TF_NEEDFIN) {
+ tp->t_acktime = ticks;
tcp_state_change(tp, TCPS_FIN_WAIT_1);
tp->t_flags &= ~TF_NEEDFIN;
} else {
@@ -2818,7 +2830,21 @@
tcp_xmit_timer(tp, ticks - tp->t_rtttime);
}
+ SOCKBUF_LOCK(&so->so_snd);
/*
+ * Clear t_acktime if remote side has ACKd all data in the
+ * socket buffer and FIN (if applicable).
+ * Otherwise, update t_acktime if we received a sufficiently
+ * large ACK.
+ */
+ if ((tp->t_state <= TCPS_CLOSE_WAIT &&
+ acked == sbavail(&so->so_snd)) ||
+ acked > sbavail(&so->so_snd))
+ tp->t_acktime = 0;
+ else if (acked > 1)
+ tp->t_acktime = ticks;
+
+ /*
* If all outstanding data is acked, stop retransmit
* timer and remember to restart (more output or persist).
* If there is more data to be acked, restart retransmit
@@ -2828,14 +2854,16 @@
tcp_timer_activate(tp, TT_REXMT, 0);
needoutput = 1;
} else if (!tcp_timer_active(tp, TT_PERSIST))
- tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
+ tcp_timer_activate(tp, TT_REXMT, TP_RXTCUR(tp));
/*
* If no data (only SYN) was ACK'd,
* skip rest of ACK processing.
*/
- if (acked == 0)
+ if (acked == 0) {
+ SOCKBUF_UNLOCK(&so->so_snd);
goto step6;
+ }
/*
* Let the congestion control algorithm update congestion
@@ -2844,7 +2872,6 @@
*/
cc_ack_received(tp, th, nsegs, CC_ACK);
- SOCKBUF_LOCK(&so->so_snd);
if (acked > sbavail(&so->so_snd)) {
if (tp->snd_wnd >= sbavail(&so->so_snd))
tp->snd_wnd -= sbavail(&so->so_snd);
Index: sys/netinet/tcp_output.c
===================================================================
--- sys/netinet/tcp_output.c
+++ sys/netinet/tcp_output.c
@@ -138,7 +138,8 @@
* non-ACK.
*/
#define TCP_XMIT_TIMER_ASSERT(tp, len, th_flags) \
- KASSERT(((len) == 0 && ((th_flags) & (TH_SYN | TH_FIN)) == 0) ||\
+ KASSERT(((len) == 0 && \
+ ((th_flags) & (TH_SYN | TH_FIN | TH_RST)) == 0) || \
tcp_timer_active((tp), TT_REXMT) || \
tcp_timer_active((tp), TT_PERSIST), \
("neither rexmt nor persist timer is set"))
@@ -482,12 +483,12 @@
*/
len = 0;
if ((sendwin == 0) && (TCPS_HAVEESTABLISHED(tp->t_state)) &&
- (off < (int) sbavail(&so->so_snd))) {
+ (off < (int) sbavail(&so->so_snd)) &&
+ !tcp_timer_active(tp, TT_PERSIST)) {
tcp_timer_activate(tp, TT_REXMT, 0);
tp->t_rxtshift = 0;
tp->snd_nxt = tp->snd_una;
- if (!tcp_timer_active(tp, TT_PERSIST))
- tcp_setpersist(tp);
+ tcp_setpersist(tp);
}
}
@@ -691,7 +692,7 @@
SEQ_GT(tp->snd_max, tp->snd_una) &&
!tcp_timer_active(tp, TT_REXMT) &&
!tcp_timer_active(tp, TT_PERSIST)) {
- tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
+ tcp_timer_activate(tp, TT_REXMT, TP_RXTCUR(tp));
goto just_return;
}
/*
@@ -1483,6 +1484,14 @@
goto timer;
tp->snd_nxt += len;
if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
+ /*
+ * Update "made progress" indication if we just
+ * sent new data while none was outstanding.
+ * This resets the time the remote side has to
+ * ACK the new data.
+ */
+ if (tp->snd_una == tp->snd_max)
+ tp->t_acktime = ticks;
tp->snd_max = tp->snd_nxt;
/*
* Time this transmission if not a retransmission and
@@ -1511,7 +1520,7 @@
tcp_timer_activate(tp, TT_PERSIST, 0);
tp->t_rxtshift = 0;
}
- tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
+ tcp_timer_activate(tp, TT_REXMT, TP_RXTCUR(tp));
} else if (len == 0 && sbavail(&so->so_snd) &&
!tcp_timer_active(tp, TT_REXMT) &&
!tcp_timer_active(tp, TT_PERSIST)) {
@@ -1659,15 +1668,29 @@
{
int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
int tt;
+ int maxunacktime;
tp->t_flags &= ~TF_PREVVALID;
if (tcp_timer_active(tp, TT_REXMT))
panic("tcp_setpersist: retransmit pending");
/*
+ * If the state is already closed, don't bother.
+ */
+ if (tp->t_state == TCPS_CLOSED)
+ return;
+
+ /*
* Start/restart persistence timer.
*/
TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
tcp_persmin, tcp_persmax);
+ if (TP_MAXUNACKTIME(tp) && tp->t_acktime) {
+ maxunacktime = tp->t_acktime + TP_MAXUNACKTIME(tp) - ticks;
+ if (maxunacktime < 1)
+ maxunacktime = 1;
+ if (maxunacktime < tt)
+ tt = maxunacktime;
+ }
tcp_timer_activate(tp, TT_PERSIST, tt);
if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
tp->t_rxtshift++;
Index: sys/netinet/tcp_syncache.c
===================================================================
--- sys/netinet/tcp_syncache.c
+++ sys/netinet/tcp_syncache.c
@@ -919,6 +919,7 @@
/*
* Copy and activate timers.
*/
+ tp->t_maxunacktime = sototcpcb(lso)->t_maxunacktime;
tp->t_keepinit = sototcpcb(lso)->t_keepinit;
tp->t_keepidle = sototcpcb(lso)->t_keepidle;
tp->t_keepintvl = sototcpcb(lso)->t_keepintvl;
Index: sys/netinet/tcp_timer.h
===================================================================
--- sys/netinet/tcp_timer.h
+++ sys/netinet/tcp_timer.h
@@ -87,6 +87,8 @@
#define TCPTV_KEEPINTVL ( 75*hz) /* default probe interval */
#define TCPTV_KEEPCNT 8 /* max probes before drop */
+#define TCPTV_MAXUNACKTIME 0 /* max time without making progress */
+
#define TCPTV_FINWAIT2_TIMEOUT (60*hz) /* FIN_WAIT_2 timeout if no receiver */
/*
@@ -181,7 +183,18 @@
#define TP_KEEPINTVL(tp) ((tp)->t_keepintvl ? (tp)->t_keepintvl : tcp_keepintvl)
#define TP_KEEPCNT(tp) ((tp)->t_keepcnt ? (tp)->t_keepcnt : tcp_keepcnt)
#define TP_MAXIDLE(tp) (TP_KEEPCNT(tp) * TP_KEEPINTVL(tp))
+#define TP_MAXUNACKTIME(tp) \
+ ((tp)->t_maxunacktime ? (tp)->t_maxunacktime : tcp_maxunacktime)
+/*
+ * Obtain the time until the restransmit timer should fire.
+ * This macro ensures the restransmit timer fires at the earlier of the
+ * t_rxtcur value or the time the maxunacktime would be exceeded.
+ */
+#define TP_RXTCUR(tp) \
+ ((TP_MAXUNACKTIME(tp) == 0 || tp->t_acktime == 0) ? tp->t_rxtcur : \
+ max(1, min(tp->t_rxtcur, tp->t_acktime + TP_MAXUNACKTIME(tp) - ticks)))
+
extern int tcp_persmin; /* minimum persist interval */
extern int tcp_persmax; /* maximum persist interval */
extern int tcp_keepinit; /* time to establish connection */
@@ -189,6 +202,7 @@
extern int tcp_keepintvl; /* time between keepalive probes */
extern int tcp_keepcnt; /* number of keepalives */
extern int tcp_delacktime; /* time before sending a delayed ACK */
+extern int tcp_maxunacktime; /* max time without making progress */
extern int tcp_maxpersistidle;
extern int tcp_rexmit_min;
extern int tcp_rexmit_slop;
Index: sys/netinet/tcp_timer.c
===================================================================
--- sys/netinet/tcp_timer.c
+++ sys/netinet/tcp_timer.c
@@ -69,6 +69,7 @@
#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_log_buf.h>
+#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/cc/cc.h>
@@ -144,6 +145,11 @@
&tcp_rexmit_drop_options, 0,
"Drop TCP options from 3rd and later retransmitted SYN");
+int tcp_maxunacktime = TCPTV_MAXUNACKTIME;
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, maxunacktime, CTLTYPE_INT|CTLFLAG_RW,
+ &tcp_maxunacktime, 0, sysctl_msec_to_ticks, "I",
+ "Maximum time (in ms) that a session can linger without making progress");
+
VNET_DEFINE(int, tcp_pmtud_blackhole_detect);
SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection,
CTLFLAG_RW|CTLFLAG_VNET,
@@ -533,11 +539,37 @@
CURVNET_RESTORE();
}
+/*
+ * Has this session exceeded the maximum time without seeing a substantive
+ * acknowledgement? If so, return true; otherwise false.
+ */
+static bool
+tcp_maxunacktime_check(struct tcpcb *tp)
+{
+
+ /* Are we tracking this timer for this session? */
+ if (TP_MAXUNACKTIME(tp) == 0)
+ return false;
+
+ /* Do we have a current measurement. */
+ if (tp->t_acktime == 0)
+ return false;
+
+ /* Are we within the acceptable range? */
+ if (TSTMP_GT(TP_MAXUNACKTIME(tp) + tp->t_acktime, (u_int)ticks))
+ return false;
+
+ /* We exceeded the timer. */
+ TCPSTAT_INC(tcps_progdrops);
+ return true;
+}
+
void
tcp_timer_persist(void *xtp)
{
struct tcpcb *tp = xtp;
struct inpcb *inp;
+ bool progdrop;
CURVNET_SET(tp->t_vnet);
#ifdef TCPDEBUG
int ostate;
@@ -572,11 +604,15 @@
* backoff, drop the connection if the idle time
* (no responses to probes) reaches the maximum
* backoff that we would use if retransmitting.
+ * Also, drop the connection if we haven't been making
+ * progress.
*/
- if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
+ progdrop = tcp_maxunacktime_check(tp);
+ if (progdrop || (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
(ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
- ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
- TCPSTAT_INC(tcps_persistdrop);
+ ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff))) {
+ if (!progdrop)
+ TCPSTAT_INC(tcps_persistdrop);
if (tcp_inpinfo_lock_add(inp)) {
tcp_inpinfo_lock_del(inp, tp);
goto out;
@@ -654,10 +690,15 @@
* Retransmission timer went off. Message has not
* been acked within retransmit interval. Back off
* to a longer retransmit interval and retransmit one segment.
+ *
+ * If we've either exceeded the maximum number of retransmissions,
+ * or we've gone long enough without making progress, then drop
+ * the session.
*/
- if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
+ if (++tp->t_rxtshift > TCP_MAXRXTSHIFT || tcp_maxunacktime_check(tp)) {
+ if (tp->t_rxtshift > TCP_MAXRXTSHIFT)
+ TCPSTAT_INC(tcps_timeoutdrop);
tp->t_rxtshift = TCP_MAXRXTSHIFT;
- TCPSTAT_INC(tcps_timeoutdrop);
if (tcp_inpinfo_lock_add(inp)) {
tcp_inpinfo_lock_del(inp, tp);
goto out;
Index: sys/netinet/tcp_usrreq.c
===================================================================
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -929,6 +929,8 @@
m_freem(control); /* empty control, just free it */
}
if (!(flags & PRUS_OOB)) {
+ if (tp->t_acktime == 0)
+ tp->t_acktime = ticks;
sbappendstream(&so->so_snd, m, flags);
if (nam && tp->t_state < TCPS_SYN_SENT) {
/*
@@ -991,6 +993,8 @@
* of data past the urgent section.
* Otherwise, snd_up should be one lower.
*/
+ if (tp->t_acktime == 0)
+ tp->t_acktime = ticks;
sbappendstream_locked(&so->so_snd, m, flags);
SOCKBUF_UNLOCK(&so->so_snd);
if (nam && tp->t_state < TCPS_SYN_SENT) {
@@ -1722,6 +1726,7 @@
INP_WUNLOCK(inp);
break;
+ case TCP_MAXUNACKTIME:
case TCP_KEEPIDLE:
case TCP_KEEPINTVL:
case TCP_KEEPINIT:
@@ -1738,6 +1743,10 @@
INP_WLOCK_RECHECK(inp);
switch (sopt->sopt_name) {
+ case TCP_MAXUNACKTIME:
+ tp->t_maxunacktime = ui;
+ break;
+
case TCP_KEEPIDLE:
tp->t_keepidle = ui;
/*
@@ -1947,11 +1956,14 @@
INP_WUNLOCK(inp);
error = sooptcopyout(sopt, buf, len + 1);
break;
+ case TCP_MAXUNACKTIME:
case TCP_KEEPIDLE:
case TCP_KEEPINTVL:
case TCP_KEEPINIT:
case TCP_KEEPCNT:
switch (sopt->sopt_name) {
+ case TCP_MAXUNACKTIME:
+ ui = TP_MAXUNACKTIME(tp) / hz;
case TCP_KEEPIDLE:
ui = TP_KEEPIDLE(tp) / hz;
break;
@@ -2150,6 +2162,8 @@
tcp_state_change(tp, TCPS_LAST_ACK);
break;
}
+ if (tp->t_acktime == 0)
+ tp->t_acktime = ticks;
if (tp->t_state >= TCPS_FIN_WAIT_2) {
soisdisconnected(tp->t_inpcb->inp_socket);
/* Prevent the connection hanging in FIN_WAIT_2 forever. */
Index: sys/netinet/tcp_var.h
===================================================================
--- sys/netinet/tcp_var.h
+++ sys/netinet/tcp_var.h
@@ -124,6 +124,8 @@
tcp_seq snd_recover; /* for use in NewReno Fast Recovery */
u_int t_rcvtime; /* inactivity time */
+ u_int t_acktime; /* last time we received a "real" ACK */
+ u_int t_maxunacktime; /* max time without making progress */
u_int t_starttime; /* time connection was established */
u_int t_rtttime; /* RTT measurement start time */
tcp_seq t_rtseq; /* sequence number being timed */
@@ -495,6 +497,7 @@
uint64_t tcps_keeptimeo; /* keepalive timeouts */
uint64_t tcps_keepprobe; /* keepalive probes sent */
uint64_t tcps_keepdrops; /* connections dropped in keepalive */
+ uint64_t tcps_progdrops; /* drops due to no progress */
uint64_t tcps_sndtotal; /* total packets sent */
uint64_t tcps_sndpack; /* data packets sent */
Index: usr.bin/netstat/inet.c
===================================================================
--- usr.bin/netstat/inet.c
+++ usr.bin/netstat/inet.c
@@ -678,6 +678,8 @@
"{N:/keepalive probe%s sent}\n");
p(tcps_keepdrops, "\t\t{:connections-dropped-by-keepalives/%ju} "
"{N:/connection%s dropped by keepalive}\n");
+ p(tcps_progdrops, "\t{:connections-dropped-due-to-progress-time/%ju} "
+ "{N:/connection%s dropped due to exceeding progress time}\n");
p(tcps_predack, "\t{:ack-header-predictions/%ju} "
"{N:/correct ACK header prediction%s}\n");
p(tcps_preddat, "\t{:data-packet-header-predictions/%ju} "
Index: usr.bin/systat/tcp.c
===================================================================
--- usr.bin/systat/tcp.c
+++ usr.bin/systat/tcp.c
@@ -123,8 +123,8 @@
L(5, "- in embryonic state"); R(5, "- ack-only");
L(6, "- on retransmit timeout"); R(6, "- window probes");
L(7, "- by keepalive"); R(7, "- window updates");
- L(8, "- from listen queue"); R(8, "- urgent data only");
- R(9, "- control");
+ L(8, "- exceeded progress time"); R(8, "- urgent data only");
+ L(9, "- from listen queue"); R(9, "- control");
R(10, "- resends by PMTU discovery");
L(11, "TCP Timers"); R(11, "total packets received");
L(12, "potential rtt updates"); R(12, "- in sequence");
@@ -177,6 +177,7 @@
DO(tcps_keeptimeo);
DO(tcps_keepprobe);
DO(tcps_keepdrops);
+ DO(tcps_progdrops);
DO(tcps_sndtotal);
DO(tcps_sndpack);
@@ -246,8 +247,8 @@
L(5, tcps_conndrops); R(5, tcps_sndacks);
L(6, tcps_timeoutdrop); R(6, tcps_sndprobe);
L(7, tcps_keepdrops); R(7, tcps_sndwinup);
- L(8, tcps_listendrop); R(8, tcps_sndurg);
- R(9, tcps_sndctrl);
+ L(8, tcps_progdrops); R(8, tcps_sndurg);
+ L(9, tcps_listendrop); R(9, tcps_sndctrl);
R(10, tcps_mturesent);
R(11, tcps_rcvtotal);
L(12, tcps_segstimed); R(12, tcps_rcvpack);

File Metadata

Mime Type
text/plain
Expires
Tue, Oct 1, 12:11 AM (14 h, 36 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
13147082
Default Alt Text
D14993.id41201.diff (15 KB)

Event Timeline