Page MenuHomeFreeBSD

D37321.diff
No OneTemporary

D37321.diff

diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -5285,37 +5285,13 @@
}
}
-static void
-bbr_timer_stop(struct tcpcb *tp, uint32_t timer_type)
+static int
+bbr_stopall(struct tcpcb *tp)
{
struct tcp_bbr *bbr;
bbr = (struct tcp_bbr *)tp->t_fb_ptr;
bbr->rc_all_timers_stopped = 1;
- return;
-}
-
-/*
- * stop all timers always returning 0.
- */
-static int
-bbr_stopall(struct tcpcb *tp)
-{
- return (0);
-}
-
-static void
-bbr_timer_activate(struct tcpcb *tp, uint32_t timer_type, uint32_t delta)
-{
- return;
-}
-
-/*
- * return true if a bbr timer (rack or tlp) is active.
- */
-static int
-bbr_timer_active(struct tcpcb *tp, uint32_t timer_type)
-{
return (0);
}
@@ -14168,9 +14144,6 @@
.tfb_tcp_fb_init = bbr_init,
.tfb_tcp_fb_fini = bbr_fini,
.tfb_tcp_timer_stop_all = bbr_stopall,
- .tfb_tcp_timer_activate = bbr_timer_activate,
- .tfb_tcp_timer_active = bbr_timer_active,
- .tfb_tcp_timer_stop = bbr_timer_stop,
.tfb_tcp_rexmit_tmr = bbr_remxt_tmr,
.tfb_tcp_handoff_ok = bbr_handoff_ok,
.tfb_tcp_mtu_chg = bbr_mtu_chg,
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -489,10 +489,6 @@
static int rack_set_sockopt(struct inpcb *inp, struct sockopt *sopt);
static void rack_set_state(struct tcpcb *tp, struct tcp_rack *rack);
static int32_t rack_stopall(struct tcpcb *tp);
-static void
-rack_timer_activate(struct tcpcb *tp, uint32_t timer_type,
- uint32_t delta);
-static int32_t rack_timer_active(struct tcpcb *tp, uint32_t timer_type);
static void rack_timer_cancel(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts, int line);
static void rack_timer_stop(struct tcpcb *tp, uint32_t timer_type);
static uint32_t
@@ -5910,9 +5906,6 @@
*/
struct rack_sendmap *rsm;
- if (tp->tt_flags & TT_STOPPED) {
- return (1);
- }
counter_u64_add(rack_to_tot, 1);
if (rack->r_state && (rack->r_state != tp->t_state))
rack_set_state(tp, rack);
@@ -6123,9 +6116,6 @@
uint32_t out, avail;
int collapsed_win = 0;
- if (tp->tt_flags & TT_STOPPED) {
- return (1);
- }
if (TSTMP_LT(cts, rack->r_ctl.rc_timer_exp)) {
/* Its not time yet */
return (0);
@@ -6312,9 +6302,7 @@
static int
rack_timeout_delack(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
{
- if (tp->tt_flags & TT_STOPPED) {
- return (1);
- }
+
rack_log_to_event(rack, RACK_TO_FRM_DELACK, NULL);
tp->t_flags &= ~TF_DELACK;
tp->t_flags |= TF_ACKNOW;
@@ -6337,9 +6325,6 @@
struct tcptemp *t_template;
int32_t retval = 1;
- if (tp->tt_flags & TT_STOPPED) {
- return (1);
- }
if (rack->rc_in_persist == 0)
return (0);
if (ctf_progress_timeout_check(tp, false)) {
@@ -6425,9 +6410,6 @@
struct tcptemp *t_template;
struct inpcb *inp = tptoinpcb(tp);
- if (tp->tt_flags & TT_STOPPED) {
- return (1);
- }
rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_KEEP;
rack_log_to_event(rack, RACK_TO_FRM_KEEP, NULL);
/*
@@ -6654,9 +6636,6 @@
int32_t retval = 0;
bool isipv6;
- if (tp->tt_flags & TT_STOPPED) {
- return (1);
- }
if ((tp->t_flags & TF_GPUTINPROG) &&
(tp->t_rxtshift)) {
/*
@@ -7060,12 +7039,6 @@
rack_log_to_cancel(rack, hpts_removed, line, us_cts, &tv, flags_on_entry);
}
-static void
-rack_timer_stop(struct tcpcb *tp, uint32_t timer_type)
-{
- return;
-}
-
static int
rack_stopall(struct tcpcb *tp)
{
@@ -7075,18 +7048,6 @@
return (0);
}
-static void
-rack_timer_activate(struct tcpcb *tp, uint32_t timer_type, uint32_t delta)
-{
- return;
-}
-
-static int
-rack_timer_active(struct tcpcb *tp, uint32_t timer_type)
-{
- return (0);
-}
-
static void
rack_stop_all_timers(struct tcpcb *tp)
{
@@ -20307,9 +20268,6 @@
.tfb_tcp_fb_init = rack_init,
.tfb_tcp_fb_fini = rack_fini,
.tfb_tcp_timer_stop_all = rack_stopall,
- .tfb_tcp_timer_activate = rack_timer_activate,
- .tfb_tcp_timer_active = rack_timer_active,
- .tfb_tcp_timer_stop = rack_timer_stop,
.tfb_tcp_rexmit_tmr = rack_remxt_tmr,
.tfb_tcp_handoff_ok = rack_handoff_ok,
.tfb_tcp_mtu_chg = rack_mtu_change,
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -1194,22 +1194,6 @@
*num_names = 0;
return (EINVAL);
}
- if (blk->tfb_tcp_timer_stop_all ||
- blk->tfb_tcp_timer_activate ||
- blk->tfb_tcp_timer_active ||
- blk->tfb_tcp_timer_stop) {
- /*
- * If you define one timer function you
- * must have them all.
- */
- if ((blk->tfb_tcp_timer_stop_all == NULL) ||
- (blk->tfb_tcp_timer_activate == NULL) ||
- (blk->tfb_tcp_timer_active == NULL) ||
- (blk->tfb_tcp_timer_stop == NULL)) {
- *num_names = 0;
- return (EINVAL);
- }
- }
if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
*num_names = 0;
@@ -2227,12 +2211,9 @@
#endif /* INET6 */
V_tcp_mssdflt;
- /* Set up our timeouts. */
- callout_init(&tp->tt_rexmt, 1);
- callout_init(&tp->tt_persist, 1);
- callout_init(&tp->tt_keep, 1);
- callout_init(&tp->tt_2msl, 1);
- callout_init(&tp->tt_delack, 1);
+ callout_init_rw(&tp->t_callout, &inp->inp_lock, CALLOUT_RETURNUNLOCKED);
+ for (int i = 0; i < TT_N; i++)
+ tp->t_timers[i] = SBT_MAX;
switch (V_tcp_do_rfc1323) {
case 0:
@@ -2301,13 +2282,6 @@
if (V_tcp_do_lrd)
tp->t_flags |= TF_LRD;
- /*
- * XXXGL: this self-reference might be pointless. It will go away
- * when the TCP timers are properly locked and could never fire after
- * tcp_discardcb().
- */
- in_pcbref(inp);
-
return (tp);
}
@@ -2341,32 +2315,15 @@
tcp_discardcb(struct tcpcb *tp)
{
struct inpcb *inp = tptoinpcb(tp);
+ struct socket *so = tptosocket(tp);
+#ifdef INET6
+ bool isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
+#endif
INP_WLOCK_ASSERT(inp);
- /*
- * Make sure that all of our timers are stopped before we delete the
- * PCB.
- *
- * If stopping a timer fails, we schedule a discard function in same
- * callout, and the last discard function called will take care of
- * deleting the tcpcb.
- */
- tp->tt_draincnt = 0;
- tcp_timer_stop(tp, TT_REXMT);
- tcp_timer_stop(tp, TT_PERSIST);
- tcp_timer_stop(tp, TT_KEEP);
- tcp_timer_stop(tp, TT_2MSL);
- tcp_timer_stop(tp, TT_DELACK);
+ tcp_timer_stop(tp);
if (tp->t_fb->tfb_tcp_timer_stop_all) {
- /*
- * Call the stop-all function of the methods,
- * this function should call the tcp_timer_stop()
- * method with each of the function specific timeouts.
- * That stop will be called via the tfb_tcp_timer_stop()
- * which should use the async drain function of the
- * callout system (see tcp_var.h).
- */
tp->t_fb->tfb_tcp_timer_stop_all(tp);
}
@@ -2402,23 +2359,7 @@
#endif
CC_ALGO(tp) = NULL;
- if (tp->tt_draincnt == 0)
- tcp_freecb(tp);
-}
-bool
-tcp_freecb(struct tcpcb *tp)
-{
- struct inpcb *inp = tptoinpcb(tp);
- struct socket *so = tptosocket(tp);
-#ifdef INET6
- bool isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
-#endif
-
- INP_WLOCK_ASSERT(inp);
- MPASS(tp->tt_draincnt == 0);
-
- /* We own the last reference on tcpcb, let's free it. */
#ifdef TCP_BLACKBOX
tcp_log_tcpcbfini(tp);
#endif
@@ -2489,8 +2430,6 @@
}
refcount_release(&tp->t_fb->tfb_refcnt);
-
- return (in_pcbrele_wlocked(inp));
}
/*
@@ -3940,17 +3879,17 @@
(tp->t_flags2 & TF2_ACE_PERMIT) ? 2 : 0;
now = getsbinuptime();
-#define COPYTIMER(ttt) do { \
- if (callout_active(&tp->ttt)) \
- xt->ttt = (tp->ttt.c_time - now) / SBT_1MS; \
- else \
- xt->ttt = 0; \
+#define COPYTIMER(which,where) do { \
+ if (tp->t_timers[which] != SBT_MAX) \
+ xt->where = (tp->t_timers[which] - now) / SBT_1MS; \
+ else \
+ xt->where = 0; \
} while (0)
- COPYTIMER(tt_delack);
- COPYTIMER(tt_rexmt);
- COPYTIMER(tt_persist);
- COPYTIMER(tt_keep);
- COPYTIMER(tt_2msl);
+ COPYTIMER(TT_DELACK, tt_delack);
+ COPYTIMER(TT_REXMT, tt_rexmt);
+ COPYTIMER(TT_PERSIST, tt_persist);
+ COPYTIMER(TT_KEEP, tt_keep);
+ COPYTIMER(TT_2MSL, tt_2msl);
#undef COPYTIMER
xt->t_rcvtime = 1000 * (ticks - tp->t_rcvtime) / hz;
diff --git a/sys/netinet/tcp_timer.h b/sys/netinet/tcp_timer.h
--- a/sys/netinet/tcp_timer.h
+++ b/sys/netinet/tcp_timer.h
@@ -145,18 +145,6 @@
#ifdef _KERNEL
-/*
- * Flags for the tcpcb's tt_flags field.
- */
-#define TT_DELACK 0x0001
-#define TT_REXMT 0x0002
-#define TT_PERSIST 0x0004
-#define TT_KEEP 0x0008
-#define TT_2MSL 0x0010
-#define TT_MASK (TT_DELACK|TT_REXMT|TT_PERSIST|TT_KEEP|TT_2MSL)
-
-#define TT_STOPPED 0x00010000
-
#define TP_KEEPINIT(tp) ((tp)->t_keepinit ? (tp)->t_keepinit : tcp_keepinit)
#define TP_KEEPIDLE(tp) ((tp)->t_keepidle ? (tp)->t_keepidle : tcp_keepidle)
#define TP_KEEPINTVL(tp) ((tp)->t_keepintvl ? (tp)->t_keepintvl : tcp_keepintvl)
@@ -205,13 +193,6 @@
VNET_DECLARE(int, tcp_msl);
#define V_tcp_msl VNET(tcp_msl)
-void tcp_timer_init(void);
-void tcp_timer_2msl(void *xtp);
-void tcp_timer_keep(void *xtp);
-void tcp_timer_persist(void *xtp);
-void tcp_timer_rexmt(void *xtp);
-void tcp_timer_delack(void *xtp);
-
#endif /* _KERNEL */
#endif /* !_NETINET_TCP_TIMER_H_ */
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -243,104 +243,86 @@
/*
* TCP timer processing.
+ *
+ * Each connection has 5 timers associated with it, which can be scheduled
+ * simultaneously. They all are serviced by one callout tcp_timer_enter().
+ * This function executes the next timer via tcp_timersw[] vector. Each
+ * timer is supposed to return 'true' unless the connection was destroyed.
+ * In the former case tcp_timer_enter() will schedule callout for next timer.
*/
-void
-tcp_timer_delack(void *xtp)
-{
- struct epoch_tracker et;
- struct tcpcb *tp = xtp;
- struct inpcb *inp = tptoinpcb(tp);
-
- INP_WLOCK(inp);
- CURVNET_SET(inp->inp_vnet);
-
- if (callout_pending(&tp->tt_delack) ||
- !callout_active(&tp->tt_delack)) {
- INP_WUNLOCK(inp);
- CURVNET_RESTORE();
- return;
- }
- callout_deactivate(&tp->tt_delack);
- if ((inp->inp_flags & INP_DROPPED) != 0) {
- INP_WUNLOCK(inp);
- CURVNET_RESTORE();
- return;
- }
- tp->t_flags |= TF_ACKNOW;
- TCPSTAT_INC(tcps_delack);
- NET_EPOCH_ENTER(et);
- (void) tcp_output_unlock(tp);
- NET_EPOCH_EXIT(et);
- CURVNET_RESTORE();
-}
+typedef bool tcp_timer_t(struct tcpcb *);
+static tcp_timer_t tcp_timer_delack;
+static tcp_timer_t tcp_timer_2msl;
+static tcp_timer_t tcp_timer_keep;
+static tcp_timer_t tcp_timer_persist;
+static tcp_timer_t tcp_timer_rexmt;
+
+static tcp_timer_t * const tcp_timersw[TT_N] = {
+ [TT_DELACK] = tcp_timer_delack,
+ [TT_REXMT] = tcp_timer_rexmt,
+ [TT_PERSIST] = tcp_timer_persist,
+ [TT_KEEP] = tcp_timer_keep,
+ [TT_2MSL] = tcp_timer_2msl,
+};
/*
- * Call tcp_close() from a callout context.
+ * tcp_output_locked() s a timer specific variation of call to tcp_output(),
+ * see tcp_var.h for the rest. It handles drop request from advanced stacks,
+ * but keeps tcpcb locked unless tcp_drop() destroyed it.
+ * Returns true if tcpcb is valid and locked.
*/
-static void
-tcp_timer_close(struct tcpcb *tp)
+static inline bool
+tcp_output_locked(struct tcpcb *tp)
{
- struct epoch_tracker et;
- struct inpcb *inp = tptoinpcb(tp);
+ int rv;
- INP_WLOCK_ASSERT(inp);
+ INP_WLOCK_ASSERT(tptoinpcb(tp));
- NET_EPOCH_ENTER(et);
- tp = tcp_close(tp);
- NET_EPOCH_EXIT(et);
- if (tp != NULL)
- INP_WUNLOCK(inp);
+ if ((rv = tp->t_fb->tfb_tcp_output(tp)) < 0) {
+ KASSERT(tp->t_fb->tfb_flags & TCP_FUNC_OUTPUT_CANDROP,
+ ("TCP stack %s requested tcp_drop(%p)",
+ tp->t_fb->tfb_tcp_block_name, tp));
+ tp = tcp_drop(tp, rv);
+ }
+
+ return (tp != NULL);
}
-/*
- * Call tcp_drop() from a callout context.
- */
-static void
-tcp_timer_drop(struct tcpcb *tp)
+static bool
+tcp_timer_delack(struct tcpcb *tp)
{
struct epoch_tracker et;
+#if defined(INVARIANTS) || defined(VIMAGE)
struct inpcb *inp = tptoinpcb(tp);
+#endif
+ bool rv;
INP_WLOCK_ASSERT(inp);
+ CURVNET_SET(inp->inp_vnet);
+ tp->t_flags |= TF_ACKNOW;
+ TCPSTAT_INC(tcps_delack);
NET_EPOCH_ENTER(et);
- tp = tcp_drop(tp, ETIMEDOUT);
+ rv = tcp_output_locked(tp);
NET_EPOCH_EXIT(et);
- if (tp != NULL)
- INP_WUNLOCK(inp);
+ CURVNET_RESTORE();
+
+ return (rv);
}
-void
-tcp_timer_2msl(void *xtp)
+static bool
+tcp_timer_2msl(struct tcpcb *tp)
{
- struct tcpcb *tp = xtp;
struct inpcb *inp = tptoinpcb(tp);
-#ifdef TCPDEBUG
- int ostate;
+ bool close = false;
- ostate = tp->t_state;
-#endif
+ INP_WLOCK_ASSERT(inp);
- INP_WLOCK(inp);
+ TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
CURVNET_SET(inp->inp_vnet);
-
tcp_log_end_status(tp, TCP_EI_STATUS_2MSL);
tcp_free_sackholes(tp);
- if (callout_pending(&tp->tt_2msl) ||
- !callout_active(&tp->tt_2msl)) {
- INP_WUNLOCK(inp);
- CURVNET_RESTORE();
- return;
- }
- callout_deactivate(&tp->tt_2msl);
- if (inp->inp_flags & INP_DROPPED) {
- INP_WUNLOCK(inp);
- CURVNET_RESTORE();
- return;
- }
- KASSERT((tp->tt_flags & TT_STOPPED) == 0,
- ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
/*
* 2 MSL timeout in shutdown went off. If we're closed but
* still waiting for peer to close and connection has been idle
@@ -354,69 +336,41 @@
* XXXGL: check if inp_socket shall always be !NULL here?
*/
if (tp->t_state == TCPS_TIME_WAIT) {
- tcp_timer_close(tp);
- CURVNET_RESTORE();
- return;
+ close = true;
} else if (tp->t_state == TCPS_FIN_WAIT_2 &&
tcp_fast_finwait2_recycle && inp->inp_socket &&
(inp->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
TCPSTAT_INC(tcps_finwait2_drops);
- tcp_timer_close(tp);
- CURVNET_RESTORE();
- return;
+ close = true;
} else {
- if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) {
- callout_reset(&tp->tt_2msl,
- TP_KEEPINTVL(tp), tcp_timer_2msl, tp);
- } else {
- tcp_timer_close(tp);
- CURVNET_RESTORE();
- return;
- }
+ if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp))
+ tcp_timer_activate(tp, TT_2MSL, TP_KEEPINTVL(tp));
+ else
+ close = true;
}
+ if (close) {
+ struct epoch_tracker et;
-#ifdef TCPDEBUG
- if (tptosocket(tp)->so_options & SO_DEBUG)
- tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
- PRU_SLOWTIMO);
-#endif
- TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
-
- INP_WUNLOCK(inp);
+ NET_EPOCH_ENTER(et);
+ tp = tcp_close(tp);
+ NET_EPOCH_EXIT(et);
+ }
CURVNET_RESTORE();
+
+ return (tp != NULL);
}
-void
-tcp_timer_keep(void *xtp)
+static bool
+tcp_timer_keep(struct tcpcb *tp)
{
struct epoch_tracker et;
- struct tcpcb *tp = xtp;
struct inpcb *inp = tptoinpcb(tp);
struct tcptemp *t_template;
-#ifdef TCPDEBUG
- int ostate;
- ostate = tp->t_state;
-#endif
+ INP_WLOCK_ASSERT(inp);
- INP_WLOCK(inp);
+ TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
CURVNET_SET(inp->inp_vnet);
-
- if (callout_pending(&tp->tt_keep) ||
- !callout_active(&tp->tt_keep)) {
- INP_WUNLOCK(inp);
- CURVNET_RESTORE();
- return;
- }
- callout_deactivate(&tp->tt_keep);
- if (inp->inp_flags & INP_DROPPED) {
- INP_WUNLOCK(inp);
- CURVNET_RESTORE();
- return;
- }
- KASSERT((tp->tt_flags & TT_STOPPED) == 0,
- ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
-
/*
* Because we don't regularly reset the keepalive callout in
* the ESTABLISHED state, it may be that we don't actually need
@@ -428,11 +382,10 @@
idletime = ticks - tp->t_rcvtime;
if (idletime < TP_KEEPIDLE(tp)) {
- callout_reset(&tp->tt_keep,
- TP_KEEPIDLE(tp) - idletime, tcp_timer_keep, tp);
- INP_WUNLOCK(inp);
+ tcp_timer_activate(tp, TT_KEEP,
+ TP_KEEPIDLE(tp) - idletime);
CURVNET_RESTORE();
- return;
+ return (true);
}
}
@@ -470,38 +423,22 @@
NET_EPOCH_EXIT(et);
free(t_template, M_TEMP);
}
- callout_reset(&tp->tt_keep, TP_KEEPINTVL(tp),
- tcp_timer_keep, tp);
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINTVL(tp));
} else
- callout_reset(&tp->tt_keep, TP_KEEPIDLE(tp),
- tcp_timer_keep, tp);
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
-#ifdef TCPDEBUG
- if (inp->inp_socket->so_options & SO_DEBUG)
- tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
- PRU_SLOWTIMO);
-#endif
- TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
- INP_WUNLOCK(inp);
CURVNET_RESTORE();
- return;
+ return (true);
dropit:
TCPSTAT_INC(tcps_keepdrops);
NET_EPOCH_ENTER(et);
tcp_log_end_status(tp, TCP_EI_STATUS_KEEP_MAX);
tp = tcp_drop(tp, ETIMEDOUT);
-
-#ifdef TCPDEBUG
- if (tp != NULL && (tptosocket(tp)->so_options & SO_DEBUG))
- tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
- PRU_SLOWTIMO);
-#endif
- TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
NET_EPOCH_EXIT(et);
- if (tp != NULL)
- INP_WUNLOCK(inp);
CURVNET_RESTORE();
+
+ return (tp != NULL);
}
/*
@@ -529,37 +466,19 @@
return true;
}
-void
-tcp_timer_persist(void *xtp)
+static bool
+tcp_timer_persist(struct tcpcb *tp)
{
struct epoch_tracker et;
- struct tcpcb *tp = xtp;
+#if defined(INVARIANTS) || defined(VIMAGE)
struct inpcb *inp = tptoinpcb(tp);
- bool progdrop;
- int outrv;
-#ifdef TCPDEBUG
- int ostate;
-
- ostate = tp->t_state;
#endif
+ bool progdrop, rv;
- INP_WLOCK(inp);
- CURVNET_SET(inp->inp_vnet);
+ INP_WLOCK_ASSERT(inp);
- if (callout_pending(&tp->tt_persist) ||
- !callout_active(&tp->tt_persist)) {
- INP_WUNLOCK(inp);
- CURVNET_RESTORE();
- return;
- }
- callout_deactivate(&tp->tt_persist);
- if (inp->inp_flags & INP_DROPPED) {
- INP_WUNLOCK(inp);
- CURVNET_RESTORE();
- return;
- }
- KASSERT((tp->tt_flags & TT_STOPPED) == 0,
- ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
+ TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
+ CURVNET_SET(inp->inp_vnet);
/*
* Persistence timer into zero window.
* Force a byte to be output, if possible.
@@ -581,9 +500,7 @@
if (!progdrop)
TCPSTAT_INC(tcps_persistdrop);
tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX);
- tcp_timer_drop(tp);
- CURVNET_RESTORE();
- return;
+ goto dropit;
}
/*
* If the user has closed the socket then drop a persisting
@@ -593,57 +510,39 @@
(ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
TCPSTAT_INC(tcps_persistdrop);
tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX);
- tcp_timer_drop(tp);
- CURVNET_RESTORE();
- return;
+ goto dropit;
}
tcp_setpersist(tp);
tp->t_flags |= TF_FORCEDATA;
NET_EPOCH_ENTER(et);
- outrv = tcp_output_nodrop(tp);
- tp->t_flags &= ~TF_FORCEDATA;
+ if ((rv = tcp_output_locked(tp)))
+ tp->t_flags &= ~TF_FORCEDATA;
+ NET_EPOCH_EXIT(et);
+ CURVNET_RESTORE();
-#ifdef TCPDEBUG
- if (tp != NULL && tptosocket(tp)->so_options & SO_DEBUG)
- tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
-#endif
- TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
- (void) tcp_unlock_or_drop(tp, outrv);
+ return (rv);
+
+dropit:
+ NET_EPOCH_ENTER(et);
+ tp = tcp_drop(tp, ETIMEDOUT);
NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
+
+ return (tp != NULL);
}
-void
-tcp_timer_rexmt(void * xtp)
+static bool
+tcp_timer_rexmt(struct tcpcb *tp)
{
struct epoch_tracker et;
- struct tcpcb *tp = xtp;
struct inpcb *inp = tptoinpcb(tp);
- int rexmt, outrv;
- bool isipv6;
-#ifdef TCPDEBUG
- int ostate;
+ int rexmt;
+ bool isipv6, rv;
- ostate = tp->t_state;
-#endif
+ INP_WLOCK_ASSERT(inp);
- INP_WLOCK(inp);
+ TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
CURVNET_SET(inp->inp_vnet);
-
- if (callout_pending(&tp->tt_rexmt) ||
- !callout_active(&tp->tt_rexmt)) {
- INP_WUNLOCK(inp);
- CURVNET_RESTORE();
- return;
- }
- callout_deactivate(&tp->tt_rexmt);
- if (inp->inp_flags & INP_DROPPED) {
- INP_WUNLOCK(inp);
- CURVNET_RESTORE();
- return;
- }
- KASSERT((tp->tt_flags & TT_STOPPED) == 0,
- ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
tcp_free_sackholes(tp);
TCP_LOG_EVENT(tp, NULL, NULL, NULL, TCP_LOG_RTO, 0, 0, NULL, false);
if (tp->t_fb->tfb_tcp_rexmit_tmr) {
@@ -664,9 +563,12 @@
TCPSTAT_INC(tcps_timeoutdrop);
tp->t_rxtshift = TCP_MAXRXTSHIFT;
tcp_log_end_status(tp, TCP_EI_STATUS_RETRAN);
- tcp_timer_drop(tp);
+ NET_EPOCH_ENTER(et);
+ tp = tcp_drop(tp, ETIMEDOUT);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
- return;
+
+ return (tp != NULL);
}
if (tp->t_state == TCPS_SYN_SENT) {
/*
@@ -883,159 +785,131 @@
cc_cong_signal(tp, NULL, CC_RTO);
NET_EPOCH_ENTER(et);
- outrv = tcp_output_nodrop(tp);
-#ifdef TCPDEBUG
- if (tp != NULL && (tptosocket(tp)->so_options & SO_DEBUG))
- tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
- PRU_SLOWTIMO);
-#endif
- TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
- (void) tcp_unlock_or_drop(tp, outrv);
+ rv = tcp_output_locked(tp);
NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
+
+ return (rv);
+}
+
+static inline tt_which
+tcp_timer_next(struct tcpcb *tp, sbintime_t *precision)
+{
+ tt_which i, rv;
+ sbintime_t after, before;
+
+ for (i = 0, rv = TT_N, after = before = SBT_MAX; i < TT_N; i++) {
+ if (tp->t_timers[i] < after) {
+ after = tp->t_timers[i];
+ rv = i;
+ }
+ before = MIN(before, tp->t_timers[i] + tp->t_precisions[i]);
+ }
+ if (precision != NULL)
+ *precision = before - after;
+
+ return (rv);
+}
+
+static void
+tcp_timer_enter(void *xtp)
+{
+ struct tcpcb *tp = xtp;
+ struct inpcb *inp = tptoinpcb(tp);
+ sbintime_t precision;
+ tt_which which;
+
+ INP_WLOCK_ASSERT(inp);
+ MPASS((curthread->td_pflags & TDP_INTCPCALLOUT) == 0);
+
+ curthread->td_pflags |= TDP_INTCPCALLOUT;
+
+ which = tcp_timer_next(tp, NULL);
+ MPASS(which < TT_N);
+ tp->t_timers[which] = SBT_MAX;
+ tp->t_precisions[which] = 0;
+
+ if (tcp_timersw[which](tp)) {
+ if ((which = tcp_timer_next(tp, &precision)) != TT_N) {
+ callout_reset_sbt_on(&tp->t_callout,
+ tp->t_timers[which], precision, tcp_timer_enter,
+ tp, inp_to_cpuid(inp), C_ABSOLUTE);
+ }
+ INP_WUNLOCK(inp);
+ }
+
+ curthread->td_pflags &= ~TDP_INTCPCALLOUT;
}
+/*
+ * Activate or stop (delta == 0) a TCP timer.
+ */
void
-tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta)
+tcp_timer_activate(struct tcpcb *tp, tt_which which, u_int delta)
{
- struct callout *t_callout;
- callout_func_t *f_callout;
struct inpcb *inp = tptoinpcb(tp);
- int cpu = inp_to_cpuid(inp);
+ sbintime_t precision;
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE)
return;
#endif
- if (tp->tt_flags & TT_STOPPED)
- return;
+ INP_WLOCK_ASSERT(inp);
- switch (timer_type) {
- case TT_DELACK:
- t_callout = &tp->tt_delack;
- f_callout = tcp_timer_delack;
- break;
- case TT_REXMT:
- t_callout = &tp->tt_rexmt;
- f_callout = tcp_timer_rexmt;
- break;
- case TT_PERSIST:
- t_callout = &tp->tt_persist;
- f_callout = tcp_timer_persist;
- break;
- case TT_KEEP:
- t_callout = &tp->tt_keep;
- f_callout = tcp_timer_keep;
- break;
- case TT_2MSL:
- t_callout = &tp->tt_2msl;
- f_callout = tcp_timer_2msl;
- break;
- default:
- if (tp->t_fb->tfb_tcp_timer_activate) {
- tp->t_fb->tfb_tcp_timer_activate(tp, timer_type, delta);
- return;
- }
- panic("tp %p bad timer_type %#x", tp, timer_type);
- }
- if (delta == 0) {
- callout_stop(t_callout);
- } else {
- callout_reset_on(t_callout, delta, f_callout, tp, cpu);
- }
-}
+ if (delta > 0)
+ callout_when(tick_sbt * delta, 0, C_HARDCLOCK,
+ &tp->t_timers[which], &tp->t_precisions[which]);
+ else
+ tp->t_timers[which] = SBT_MAX;
-int
-tcp_timer_active(struct tcpcb *tp, uint32_t timer_type)
-{
- struct callout *t_callout;
-
- switch (timer_type) {
- case TT_DELACK:
- t_callout = &tp->tt_delack;
- break;
- case TT_REXMT:
- t_callout = &tp->tt_rexmt;
- break;
- case TT_PERSIST:
- t_callout = &tp->tt_persist;
- break;
- case TT_KEEP:
- t_callout = &tp->tt_keep;
- break;
- case TT_2MSL:
- t_callout = &tp->tt_2msl;
- break;
- default:
- if (tp->t_fb->tfb_tcp_timer_active) {
- return(tp->t_fb->tfb_tcp_timer_active(tp, timer_type));
- }
- panic("tp %p bad timer_type %#x", tp, timer_type);
- }
- return callout_active(t_callout);
+ if ((which = tcp_timer_next(tp, &precision)) != TT_N)
+ callout_reset_sbt_on(&tp->t_callout, tp->t_timers[which],
+ precision, tcp_timer_enter, tp, inp_to_cpuid(inp),
+ C_ABSOLUTE);
+ else
+ callout_stop(&tp->t_callout);
}
-static void
-tcp_timer_discard(void *ptp)
+bool
+tcp_timer_active(struct tcpcb *tp, tt_which which)
{
- struct epoch_tracker et;
- struct tcpcb *tp = (struct tcpcb *)ptp;
- struct inpcb *inp = tptoinpcb(tp);
- INP_WLOCK(inp);
- CURVNET_SET(inp->inp_vnet);
- NET_EPOCH_ENTER(et);
+ INP_WLOCK_ASSERT(tptoinpcb(tp));
- KASSERT((tp->tt_flags & TT_STOPPED) != 0,
- ("%s: tcpcb has to be stopped here", __func__));
- if (--tp->tt_draincnt > 0 ||
- tcp_freecb(tp) == false)
- INP_WUNLOCK(inp);
- NET_EPOCH_EXIT(et);
- CURVNET_RESTORE();
+ return (tp->t_timers[which] != SBT_MAX);
}
+/*
+ * Stop all timers associated with tcpcb.
+ *
+ * Called only on tcpcb destruction. The tcpcb shall already be dropped from
+ * the pcb lookup database and socket is not losing the last reference.
+ *
+ * XXXGL: unfortunately our callout(9) is not able to fully stop a locked
+ * callout even when only two threads are involved: the callout itself and the
+ * thread that does callout_stop(). See where softclock_call_cc() swaps the
+ * callwheel lock to callout lock and then checks cc_exec_cancel(). This is
+ * the race window. If it happens, the tcp_timer_enter() won't be executed,
+ * however pcb lock will be locked and released, hence we can't free memory.
+ * Until callout(9) is improved, just keep retrying. In my profiling I've seen
+ * such event happening less than 1 time per hour with 20-30 Gbit/s of traffic.
+ */
void
-tcp_timer_stop(struct tcpcb *tp, uint32_t timer_type)
+tcp_timer_stop(struct tcpcb *tp)
{
- struct callout *t_callout;
-
- tp->tt_flags |= TT_STOPPED;
- switch (timer_type) {
- case TT_DELACK:
- t_callout = &tp->tt_delack;
- break;
- case TT_REXMT:
- t_callout = &tp->tt_rexmt;
- break;
- case TT_PERSIST:
- t_callout = &tp->tt_persist;
- break;
- case TT_KEEP:
- t_callout = &tp->tt_keep;
- break;
- case TT_2MSL:
- t_callout = &tp->tt_2msl;
- break;
- default:
- if (tp->t_fb->tfb_tcp_timer_stop) {
- /*
- * XXXrrs we need to look at this with the
- * stop case below (flags).
- */
- tp->t_fb->tfb_tcp_timer_stop(tp, timer_type);
- return;
- }
- panic("tp %p bad timer_type %#x", tp, timer_type);
- }
+ struct inpcb *inp = tptoinpcb(tp);
- if (callout_async_drain(t_callout, tcp_timer_discard) == 0) {
- /*
- * Can't stop the callout, defer tcpcb actual deletion
- * to the last one. We do this using the async drain
- * function and incrementing the count in
- */
- tp->tt_draincnt++;
+ INP_WLOCK_ASSERT(inp);
+
+ if (curthread->td_pflags & TDP_INTCPCALLOUT) {
+ int stopped __diagused;
+
+ stopped = callout_stop(&tp->t_callout);
+ MPASS(stopped == 0);
+ } else while(__predict_false(callout_stop(&tp->t_callout) == 0)) {
+ INP_WUNLOCK(inp);
+ kern_yield(PRI_UNCHANGED);
+ INP_WLOCK(inp);
}
}
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -3072,10 +3072,8 @@
TAILQ_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
db_print_indent(indent);
- db_printf("tt_rexmt: %p tt_persist: %p tt_keep: %p\n",
- &tp->tt_rexmt, &tp->tt_persist, &tp->tt_keep);
- db_printf("tt_2msl: %p tt_delack: %p\n", &tp->tt_2msl,
- &tp->tt_delack);
+ db_printf("t_callout: %p t_timers: %p\n",
+ &tp->t_callout, &tp->t_timers);
db_print_indent(indent);
db_printf("t_state: %d (", tp->t_state);
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -126,6 +126,15 @@
STAILQ_HEAD(tcp_log_stailq, tcp_log_mem);
+typedef enum {
+ TT_DELACK = 0,
+ TT_REXMT,
+ TT_PERSIST,
+ TT_KEEP,
+ TT_2MSL,
+ TT_N,
+} tt_which;
+
/*
* Tcp control block, one per tcp connection.
*/
@@ -137,13 +146,9 @@
struct tcp_function_block *t_fb;/* TCP function call block */
void *t_fb_ptr; /* Pointer to t_fb specific data */
- struct callout tt_rexmt; /* retransmit timer */
- struct callout tt_persist; /* retransmit persistence */
- struct callout tt_keep; /* keepalive */
- struct callout tt_2msl; /* 2*msl TIME_WAIT timer */
- struct callout tt_delack; /* delayed ACK timer */
- uint32_t tt_flags; /* Timers flags */
- uint32_t tt_draincnt; /* Count being drained */
+ struct callout t_callout;
+ sbintime_t t_timers[TT_N];
+ sbintime_t t_precisions[TT_N];
uint32_t t_maxseg:24, /* maximum segment size */
t_logstate:8; /* State of "black box" logging */
@@ -370,10 +375,6 @@
void (*tfb_tcp_fb_fini)(struct tcpcb *, int);
/* Optional timers, must define all if you define one */
int (*tfb_tcp_timer_stop_all)(struct tcpcb *);
- void (*tfb_tcp_timer_activate)(struct tcpcb *,
- uint32_t, u_int);
- int (*tfb_tcp_timer_active)(struct tcpcb *, uint32_t);
- void (*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t);
void (*tfb_tcp_rexmit_tmr)(struct tcpcb *);
int (*tfb_tcp_handoff_ok)(struct tcpcb *);
void (*tfb_tcp_mtu_chg)(struct tcpcb *);
@@ -1086,7 +1087,6 @@
struct tcpcb *
tcp_close(struct tcpcb *);
void tcp_discardcb(struct tcpcb *);
-bool tcp_freecb(struct tcpcb *);
void tcp_twstart(struct tcpcb *);
int tcp_ctloutput(struct socket *, struct sockopt *);
void tcp_fini(void *);
@@ -1186,9 +1186,9 @@
struct tcptemp *
tcpip_maketemplate(struct inpcb *);
void tcpip_fillheaders(struct inpcb *, uint16_t, void *, void *);
-void tcp_timer_activate(struct tcpcb *, uint32_t, u_int);
-int tcp_timer_active(struct tcpcb *, uint32_t);
-void tcp_timer_stop(struct tcpcb *, uint32_t);
+void tcp_timer_activate(struct tcpcb *, tt_which, u_int);
+bool tcp_timer_active(struct tcpcb *, tt_which);
+void tcp_timer_stop(struct tcpcb *);
void tcp_trace(short, short, struct tcpcb *, void *, struct tcphdr *, int);
int inp_to_cpuid(struct inpcb *inp);
/*
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -557,7 +557,7 @@
#define TDP_RESETSPUR 0x04000000 /* Reset spurious page fault history. */
#define TDP_NERRNO 0x08000000 /* Last errno is already in td_errno */
#define TDP_UIOHELD 0x10000000 /* Current uio has pages held in td_ma */
-#define TDP_UNUSED0 0x20000000 /* UNUSED */
+#define TDP_INTCPCALLOUT 0x20000000 /* used by netinet/tcp_timer.c */
#define TDP_EXECVMSPC 0x40000000 /* Execve destroyed old vmspace */
#define TDP_SIGFASTPENDING 0x80000000 /* Pending signal due to sigfastblock */

File Metadata

Mime Type
text/plain
Expires
Sat, Jan 25, 7:22 AM (20 h, 7 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16130585
Default Alt Text
D37321.diff (29 KB)

Event Timeline