Page MenuHomeFreeBSD

D34573.diff
No OneTemporary

D34573.diff

diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -420,7 +420,7 @@
uint32_t tsused);
static void
rack_cong_signal(struct tcpcb *tp,
- uint32_t type, uint32_t ack);
+ uint32_t type, uint32_t ack, int );
static void rack_counter_destroy(void);
static int
rack_ctloutput(struct inpcb *inp, struct sockopt *sopt);
@@ -562,7 +562,6 @@
tp = rack->rc_tp;
if (tp->cc_algo == NULL) {
/* Tcb is leaving */
- printf("No cc algorithm?\n");
return;
}
rack->rc_pacing_cc_set = 1;
@@ -2120,6 +2119,7 @@
log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
log.u_bbr.pacing_gain = rack->r_must_retran;
+ log.u_bbr.cwnd_gain = rack->rc_has_collapsed;
log.u_bbr.lt_epoch = rack->rc_tp->t_rxtshift;
log.u_bbr.lost = rack_rto_min;
TCP_LOG_EVENTP(rack->rc_tp, NULL,
@@ -2510,6 +2510,7 @@
log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
log.u_bbr.pacing_gain = rack->r_must_retran;
+ log.u_bbr.cwnd_gain = rack->rc_has_collapsed;
TCP_LOG_EVENTP(rack->rc_tp, NULL,
&rack->rc_inp->inp_socket->so_rcv,
&rack->rc_inp->inp_socket->so_snd,
@@ -2616,7 +2617,7 @@
}
static void
-rack_log_to_prr(struct tcp_rack *rack, int frm, int orig_cwnd)
+rack_log_to_prr(struct tcp_rack *rack, int frm, int orig_cwnd, int line)
{
if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) {
union tcp_log_stackspecific log;
@@ -2632,6 +2633,7 @@
log.u_bbr.flex4 = rack->r_ctl.rc_prr_delivered;
log.u_bbr.flex5 = rack->r_ctl.rc_sacked;
log.u_bbr.flex6 = rack->r_ctl.rc_holes_rxt;
+ log.u_bbr.flex7 = line;
log.u_bbr.flex8 = frm;
log.u_bbr.pkts_out = orig_cwnd;
log.u_bbr.timeStamp = tcp_get_usecs(&tv);
@@ -4702,9 +4704,9 @@
rack->r_ctl.rc_prr_sndcnt);
}
rack->r_ctl.rc_prr_sndcnt = 0;
- rack_log_to_prr(rack, 1, 0);
+ rack_log_to_prr(rack, 1, 0, __LINE__);
}
- rack_log_to_prr(rack, 14, orig_cwnd);
+ rack_log_to_prr(rack, 14, orig_cwnd, __LINE__);
tp->snd_recover = tp->snd_una;
if (rack->r_ctl.dsack_persist) {
rack->r_ctl.dsack_persist--;
@@ -4717,7 +4719,7 @@
}
static void
-rack_cong_signal(struct tcpcb *tp, uint32_t type, uint32_t ack)
+rack_cong_signal(struct tcpcb *tp, uint32_t type, uint32_t ack, int line)
{
struct tcp_rack *rack;
uint32_t ssthresh_enter, cwnd_enter, in_rec_at_entry, orig_cwnd;
@@ -4742,7 +4744,7 @@
rack->r_ctl.rc_prr_out = 0;
if (rack->rack_no_prr == 0) {
rack->r_ctl.rc_prr_sndcnt = ctf_fixed_maxseg(tp);
- rack_log_to_prr(rack, 2, in_rec_at_entry);
+ rack_log_to_prr(rack, 2, in_rec_at_entry, line);
}
rack->r_ctl.rc_prr_recovery_fs = tp->snd_max - tp->snd_una;
tp->snd_recover = tp->snd_max;
@@ -4772,7 +4774,7 @@
ctf_fixed_maxseg(tp)) * ctf_fixed_maxseg(tp);
orig_cwnd = tp->snd_cwnd;
tp->snd_cwnd = ctf_fixed_maxseg(tp);
- rack_log_to_prr(rack, 16, orig_cwnd);
+ rack_log_to_prr(rack, 16, orig_cwnd, line);
if (tp->t_flags2 & TF2_ECN_PERMIT)
tp->t_flags2 |= TF2_ECN_SND_CWR;
break;
@@ -4800,7 +4802,7 @@
CC_ALGO(tp)->cong_signal(tp->ccv, type);
}
if ((in_rec_at_entry == 0) && IN_RECOVERY(tp->t_flags)) {
- rack_log_to_prr(rack, 15, cwnd_enter);
+ rack_log_to_prr(rack, 15, cwnd_enter, line);
rack->r_ctl.dsack_byte_cnt = 0;
rack->r_ctl.retran_during_recovery = 0;
rack->r_ctl.rc_cwnd_at_erec = cwnd_enter;
@@ -5105,6 +5107,7 @@
if (rsm == NULL)
return (NULL);
+
if (rsm->r_flags & RACK_ACKED) {
rsm = rack_find_lowest_rsm(rack);
if (rsm == NULL)
@@ -5120,7 +5123,7 @@
return (NULL);
}
/* Ok if we reach here we are over-due and this guy can be sent */
- rack_cong_signal(tp, CC_NDUPACK, tp->snd_una);
+ rack_cong_signal(tp, CC_NDUPACK, tp->snd_una, __LINE__);
return (rsm);
}
@@ -5707,7 +5710,7 @@
* real pacing. And the tlp or rxt is smaller
* than the pacing calculation. Lets not
* pace that long since we know the calculation
- * so far is not accurate.
+ * so far is not accurate.
*/
slot = hpts_timeout;
}
@@ -6069,7 +6072,7 @@
so = tp->t_inpcb->inp_socket;
avail = sbavail(&so->so_snd);
out = tp->snd_max - tp->snd_una;
- if (out > tp->snd_wnd) {
+ if ((out > tp->snd_wnd) || rack->rc_has_collapsed) {
/* special case, we need a retransmission */
collapsed_win = 1;
goto need_retran;
@@ -6123,7 +6126,7 @@
if (out + amm <= tp->snd_wnd) {
rack->r_ctl.rc_prr_sndcnt = amm;
rack->r_ctl.rc_tlp_new_data = amm;
- rack_log_to_prr(rack, 4, 0);
+ rack_log_to_prr(rack, 4, 0, __LINE__);
}
} else
goto need_retran;
@@ -6467,7 +6470,7 @@
if (rack->r_ctl.rc_resend != NULL)
rack->r_ctl.rc_resend->r_flags |= RACK_TO_REXT;
rack->r_ctl.rc_prr_sndcnt = 0;
- rack_log_to_prr(rack, 6, 0);
+ rack_log_to_prr(rack, 6, 0, __LINE__);
rack->r_timer_override = 1;
if ((((tp->t_flags & TF_SACK_PERMIT) == 0)
#ifdef NETFLIX_EXP_DETECTION
@@ -6835,7 +6838,7 @@
tp->snd_recover = tp->snd_max;
tp->t_flags |= TF_ACKNOW;
tp->t_rtttime = 0;
- rack_cong_signal(tp, CC_RTO, tp->snd_una);
+ rack_cong_signal(tp, CC_RTO, tp->snd_una, __LINE__);
out:
return (retval);
}
@@ -7068,6 +7071,20 @@
}
TAILQ_INSERT_TAIL(&rack->r_ctl.rc_tmap, rsm, r_tnext);
rsm->r_in_tmap = 1;
+ /* Take off the must retransmit flag, if its on */
+ if (rsm->r_flags & RACK_MUST_RXT) {
+ if (rack->r_must_retran)
+ rack->r_ctl.rc_out_at_rto -= (rsm->r_end - rsm->r_start);
+ if (SEQ_GEQ(rsm->r_end, rack->r_ctl.rc_snd_max_at_rto)) {
+ /*
+ * We have retransmitted all we need. Clear
+ * any must retransmit flags.
+ */
+ rack->r_must_retran = 0;
+ rack->r_ctl.rc_out_at_rto = 0;
+ }
+ rsm->r_flags &= ~RACK_MUST_RXT;
+ }
if (rsm->r_flags & RACK_SACK_PASSED) {
/* We have retransmitted due to the SACK pass */
rsm->r_flags &= ~RACK_SACK_PASSED;
@@ -7827,7 +7844,7 @@
(!IN_FASTRECOVERY(tp->t_flags))) {
/* Segment was a TLP and our retrans matched */
if (rack->r_ctl.rc_tlp_cwnd_reduce) {
- rack_cong_signal(tp, CC_NDUPACK, tp->snd_una);
+ rack_cong_signal(tp, CC_NDUPACK, tp->snd_una, __LINE__);
}
}
if (SEQ_LT(rack->r_ctl.rc_rack_tmit_time, (uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)])) {
@@ -9014,7 +9031,7 @@
tp->t_flags &= ~TF_PREVVALID;
if (to->to_tsecr == rack_ts_to_msec(rsm->r_tim_lastsent[0])) {
/* The first transmit is what this ack is for */
- rack_cong_signal(tp, CC_RTO_ERR, th_ack);
+ rack_cong_signal(tp, CC_RTO_ERR, th_ack, __LINE__);
}
}
left = th_ack - rsm->r_end;
@@ -9134,7 +9151,7 @@
orig_cwnd = tp->snd_cwnd;
tp->snd_ssthresh = rack->r_ctl.rc_ssthresh_at_erec;
tp->snd_recover = tp->snd_una;
- rack_log_to_prr(rack, 14, orig_cwnd);
+ rack_log_to_prr(rack, 14, orig_cwnd, __LINE__);
EXIT_RECOVERY(tp->t_flags);
}
rack->r_might_revert = 0;
@@ -9328,7 +9345,7 @@
sndcnt /= (long)rack->r_ctl.rc_prr_recovery_fs;
else {
rack->r_ctl.rc_prr_sndcnt = 0;
- rack_log_to_prr(rack, 9, 0);
+ rack_log_to_prr(rack, 9, 0, __LINE__);
sndcnt = 0;
}
sndcnt++;
@@ -9337,7 +9354,7 @@
else
sndcnt = 0;
rack->r_ctl.rc_prr_sndcnt = sndcnt;
- rack_log_to_prr(rack, 10, 0);
+ rack_log_to_prr(rack, 10, 0, __LINE__);
} else {
uint32_t limit;
@@ -9350,10 +9367,10 @@
limit += ctf_fixed_maxseg(tp);
if (tp->snd_ssthresh > pipe) {
rack->r_ctl.rc_prr_sndcnt = min((tp->snd_ssthresh - pipe), limit);
- rack_log_to_prr(rack, 11, 0);
+ rack_log_to_prr(rack, 11, 0, __LINE__);
} else {
rack->r_ctl.rc_prr_sndcnt = min(0, limit);
- rack_log_to_prr(rack, 12, 0);
+ rack_log_to_prr(rack, 12, 0, __LINE__);
}
}
}
@@ -9676,17 +9693,18 @@
tsused = tcp_get_usecs(NULL);
rsm = tcp_rack_output(tp, rack, tsused);
if ((!IN_FASTRECOVERY(tp->t_flags)) &&
- rsm) {
+ rsm &&
+ ((rsm->r_flags & RACK_MUST_RXT) == 0)) {
/* Enter recovery */
entered_recovery = 1;
- rack_cong_signal(tp, CC_NDUPACK, tp->snd_una);
+ rack_cong_signal(tp, CC_NDUPACK, tp->snd_una, __LINE__);
/*
* When we enter recovery we need to assure we send
* one packet.
*/
if (rack->rack_no_prr == 0) {
rack->r_ctl.rc_prr_sndcnt = ctf_fixed_maxseg(tp);
- rack_log_to_prr(rack, 8, 0);
+ rack_log_to_prr(rack, 8, 0, __LINE__);
}
rack->r_timer_override = 1;
rack->r_early = 0;
@@ -9728,6 +9746,19 @@
rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap);
while (rsm && (rsm->r_dupack >= DUP_ACK_THRESHOLD)) {
rsm = TAILQ_NEXT(rsm, r_tnext);
+ if (rsm->r_flags & RACK_MUST_RXT) {
+ /* Sendmap entries that are marked to
+ * be retransmitted do not need dupack's
+ * struck. We get these marks for a number
+ * of reasons (rxt timeout with no sack,
+ * mtu change, or rwnd collapses). When
+ * these events occur, we know we must retransmit
+ * them and mark the sendmap entries. Dupack counting
+ * is not needed since we are already set to retransmit
+ * it as soon as we can.
+ */
+ continue;
+ }
}
if (rsm && (rsm->r_dupack < 0xff)) {
rsm->r_dupack++;
@@ -9746,7 +9777,7 @@
if (rack->r_ctl.rc_resend != NULL) {
if (!IN_FASTRECOVERY(rack->rc_tp->t_flags)) {
rack_cong_signal(rack->rc_tp, CC_NDUPACK,
- rack->rc_tp->snd_una);
+ rack->rc_tp->snd_una, __LINE__);
}
rack->r_wanted_output = 1;
rack->r_timer_override = 1;
@@ -10100,7 +10131,7 @@
tp->t_flags &= ~TF_PREVVALID;
if (tp->t_rxtshift == 1 &&
(int)(ticks - tp->t_badrxtwin) < 0)
- rack_cong_signal(tp, CC_RTO_ERR, th->th_ack);
+ rack_cong_signal(tp, CC_RTO_ERR, th->th_ack, __LINE__);
}
if (acked) {
/* assure we are not backed off */
@@ -10310,14 +10341,27 @@
rack_un_collapse_window(struct tcp_rack *rack)
{
struct rack_sendmap *rsm;
+ int cnt = 0;;
+ rack->r_ctl.rc_out_at_rto = 0;
+ rack->r_ctl.rc_snd_max_at_rto = rack->rc_tp->snd_una;
RB_FOREACH_REVERSE(rsm, rack_rb_tree_head, &rack->r_ctl.rc_mtree) {
- if (rsm->r_flags & RACK_RWND_COLLAPSED)
+ if (rsm->r_flags & RACK_RWND_COLLAPSED) {
rsm->r_flags &= ~RACK_RWND_COLLAPSED;
+ rsm->r_flags |= RACK_MUST_RXT;
+ if (SEQ_GEQ(rsm->r_end, rack->r_ctl.rc_snd_max_at_rto)) {
+ rack->r_ctl.rc_snd_max_at_rto = rsm->r_end;
+ rack->r_ctl.rc_out_at_rto += (rsm->r_end - rsm->r_start);
+ }
+ cnt++;
+ }
else
break;
}
rack->rc_has_collapsed = 0;
+ if (cnt) {
+ rack->r_must_retran = 1;
+ }
}
static void
@@ -10442,7 +10486,7 @@
if ((rack->rc_in_persist == 0) &&
(tp->snd_wnd < min((rack->r_ctl.rc_high_rwnd/2), rack->r_ctl.rc_pace_min_segs)) &&
TCPS_HAVEESTABLISHED(tp->t_state) &&
- (tp->snd_max == tp->snd_una) &&
+ ((tp->snd_max == tp->snd_una) || rack->rc_has_collapsed) &&
sbavail(&tp->t_inpcb->inp_socket->so_snd) &&
(sbavail(&tp->t_inpcb->inp_socket->so_snd) > tp->snd_wnd)) {
/*
@@ -10899,7 +10943,7 @@
if ((rack->rc_in_persist == 0) &&
(tp->snd_wnd < min((rack->r_ctl.rc_high_rwnd/2), rack->r_ctl.rc_pace_min_segs)) &&
TCPS_HAVEESTABLISHED(tp->t_state) &&
- (tp->snd_max == tp->snd_una) &&
+ ((tp->snd_max == tp->snd_una) || rack->rc_has_collapsed) &&
sbavail(&tp->t_inpcb->inp_socket->so_snd) &&
(sbavail(&tp->t_inpcb->inp_socket->so_snd) > tp->snd_wnd)) {
/*
@@ -10933,7 +10977,7 @@
tp->t_flags &= ~TF_PREVVALID;
if (tp->t_rxtshift == 1 &&
(int)(ticks - tp->t_badrxtwin) < 0)
- rack_cong_signal(tp, CC_RTO_ERR, th->th_ack);
+ rack_cong_signal(tp, CC_RTO_ERR, th->th_ack, __LINE__);
}
/*
* Recalculate the transmit timer / rtt.
@@ -13003,7 +13047,7 @@
if ((rack->rc_in_persist == 0) &&
(tp->snd_wnd < min((rack->r_ctl.rc_high_rwnd/2), rack->r_ctl.rc_pace_min_segs)) &&
TCPS_HAVEESTABLISHED(tp->t_state) &&
- (tp->snd_max == tp->snd_una) &&
+ ((tp->snd_max == tp->snd_una) || rack->rc_has_collapsed) &&
sbavail(&tp->t_inpcb->inp_socket->so_snd) &&
(sbavail(&tp->t_inpcb->inp_socket->so_snd) > tp->snd_wnd)) {
/*
@@ -13382,7 +13426,7 @@
tp->t_rcvtime = ticks;
/* Now what about ECN? */
if (tcp_ecn_input_segment(tp, ae->flags, ae->codepoint))
- rack_cong_signal(tp, CC_ECN, ae->ack);
+ rack_cong_signal(tp, CC_ECN, ae->ack, __LINE__);
#ifdef TCP_ACCOUNTING
/* Count for the specific type of ack in */
counter_u64_add(tcp_cnt_counters[ae->ack_val_set], 1);
@@ -13618,7 +13662,7 @@
tp->t_flags &= ~TF_PREVVALID;
if (tp->t_rxtshift == 1 &&
(int)(ticks - tp->t_badrxtwin) < 0)
- rack_cong_signal(tp, CC_RTO_ERR, high_seq);
+ rack_cong_signal(tp, CC_RTO_ERR, high_seq, __LINE__);
}
/* Handle the data in the socket buffer */
KMOD_TCPSTAT_ADD(tcps_rcvackpack, 1);
@@ -14155,7 +14199,7 @@
* this to occur after we've validated the segment.
*/
if (tcp_ecn_input_segment(tp, thflags, iptos))
- rack_cong_signal(tp, CC_ECN, th->th_ack);
+ rack_cong_signal(tp, CC_ECN, th->th_ack, __LINE__);
/*
* If echoed timestamp is later than the current time, fall back to
@@ -14495,6 +14539,9 @@
}
/* ok lets look at this one */
rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap);
+ if (rack->r_must_retran && rsm && (rsm->r_flags & RACK_MUST_RXT)) {
+ return (rsm);
+ }
if (rsm && ((rsm->r_flags & RACK_ACKED) == 0)) {
goto check_it;
}
@@ -15954,29 +16001,6 @@
slot = rack->r_ctl.rc_min_to;
}
rack_start_hpts_timer(rack, tp, cts, slot, len, 0);
- if (rack->r_must_retran) {
- rack->r_ctl.rc_out_at_rto -= (rsm->r_end - rsm->r_start);
- if ((SEQ_GEQ(rsm->r_end, rack->r_ctl.rc_snd_max_at_rto)) ||
- ((rsm->r_flags & RACK_MUST_RXT) == 0)) {
- /*
- * We have retransmitted all we need. If
- * RACK_MUST_RXT is not set then we need to
- * not retransmit this guy.
- */
- rack->r_must_retran = 0;
- rack->r_ctl.rc_out_at_rto = 0;
- if ((rsm->r_flags & RACK_MUST_RXT) == 0) {
- /* Not one we should rxt */
- goto failed;
- } else {
- /* Clear the flag */
- rsm->r_flags &= ~RACK_MUST_RXT;
- }
- } else {
- /* Remove the flag */
- rsm->r_flags &= ~RACK_MUST_RXT;
- }
- }
#ifdef TCP_ACCOUNTING
crtsc = get_cyclecount();
if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
@@ -16801,9 +16825,10 @@
} else if ((rsm = tcp_rack_output(tp, rack, cts)) != NULL) {
/* We have a retransmit that takes precedence */
if ((!IN_FASTRECOVERY(tp->t_flags)) &&
+ ((rsm->r_flags & RACK_MUST_RXT) == 0) &&
((tp->t_flags & TF_WASFRECOVERY) == 0)) {
/* Enter recovery if not induced by a time-out */
- rack_cong_signal(tp, CC_NDUPACK, tp->snd_una);
+ rack_cong_signal(tp, CC_NDUPACK, tp->snd_una, __LINE__);
}
#ifdef INVARIANTS
if (SEQ_LT(rsm->r_start, tp->snd_una)) {
@@ -16876,7 +16901,6 @@
/* TSNH */
rack->r_must_retran = 0;
rack->r_ctl.rc_out_at_rto = 0;
- rack->r_must_retran = 0;
so = inp->inp_socket;
sb = &so->so_snd;
goto just_return_nolock;
@@ -16941,15 +16965,6 @@
if (ret == 0)
return (0);
}
- if (rsm && (rsm->r_flags & RACK_MUST_RXT)) {
- /*
- * Clear the flag in prep for the send
- * note that if we can't get an mbuf
- * and fail, we won't retransmit this
- * rsm but that should be ok (its rare).
- */
- rsm->r_flags &= ~RACK_MUST_RXT;
- }
so = inp->inp_socket;
sb = &so->so_snd;
if (do_a_prefetch == 0) {

File Metadata

Mime Type
text/plain
Expires
Mon, Nov 18, 3:50 PM (21 h, 17 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14700246
Default Alt Text
D34573.diff (15 KB)

Event Timeline