Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F102822855
D34573.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
15 KB
Referenced Files
None
Subscribers
None
D34573.diff
View Options
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -420,7 +420,7 @@
uint32_t tsused);
static void
rack_cong_signal(struct tcpcb *tp,
- uint32_t type, uint32_t ack);
+ uint32_t type, uint32_t ack, int );
static void rack_counter_destroy(void);
static int
rack_ctloutput(struct inpcb *inp, struct sockopt *sopt);
@@ -562,7 +562,6 @@
tp = rack->rc_tp;
if (tp->cc_algo == NULL) {
/* Tcb is leaving */
- printf("No cc algorithm?\n");
return;
}
rack->rc_pacing_cc_set = 1;
@@ -2120,6 +2119,7 @@
log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
log.u_bbr.pacing_gain = rack->r_must_retran;
+ log.u_bbr.cwnd_gain = rack->rc_has_collapsed;
log.u_bbr.lt_epoch = rack->rc_tp->t_rxtshift;
log.u_bbr.lost = rack_rto_min;
TCP_LOG_EVENTP(rack->rc_tp, NULL,
@@ -2510,6 +2510,7 @@
log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
log.u_bbr.delivered = rack->r_ctl.rc_snd_max_at_rto;
log.u_bbr.pacing_gain = rack->r_must_retran;
+ log.u_bbr.cwnd_gain = rack->rc_has_collapsed;
TCP_LOG_EVENTP(rack->rc_tp, NULL,
&rack->rc_inp->inp_socket->so_rcv,
&rack->rc_inp->inp_socket->so_snd,
@@ -2616,7 +2617,7 @@
}
static void
-rack_log_to_prr(struct tcp_rack *rack, int frm, int orig_cwnd)
+rack_log_to_prr(struct tcp_rack *rack, int frm, int orig_cwnd, int line)
{
if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) {
union tcp_log_stackspecific log;
@@ -2632,6 +2633,7 @@
log.u_bbr.flex4 = rack->r_ctl.rc_prr_delivered;
log.u_bbr.flex5 = rack->r_ctl.rc_sacked;
log.u_bbr.flex6 = rack->r_ctl.rc_holes_rxt;
+ log.u_bbr.flex7 = line;
log.u_bbr.flex8 = frm;
log.u_bbr.pkts_out = orig_cwnd;
log.u_bbr.timeStamp = tcp_get_usecs(&tv);
@@ -4702,9 +4704,9 @@
rack->r_ctl.rc_prr_sndcnt);
}
rack->r_ctl.rc_prr_sndcnt = 0;
- rack_log_to_prr(rack, 1, 0);
+ rack_log_to_prr(rack, 1, 0, __LINE__);
}
- rack_log_to_prr(rack, 14, orig_cwnd);
+ rack_log_to_prr(rack, 14, orig_cwnd, __LINE__);
tp->snd_recover = tp->snd_una;
if (rack->r_ctl.dsack_persist) {
rack->r_ctl.dsack_persist--;
@@ -4717,7 +4719,7 @@
}
static void
-rack_cong_signal(struct tcpcb *tp, uint32_t type, uint32_t ack)
+rack_cong_signal(struct tcpcb *tp, uint32_t type, uint32_t ack, int line)
{
struct tcp_rack *rack;
uint32_t ssthresh_enter, cwnd_enter, in_rec_at_entry, orig_cwnd;
@@ -4742,7 +4744,7 @@
rack->r_ctl.rc_prr_out = 0;
if (rack->rack_no_prr == 0) {
rack->r_ctl.rc_prr_sndcnt = ctf_fixed_maxseg(tp);
- rack_log_to_prr(rack, 2, in_rec_at_entry);
+ rack_log_to_prr(rack, 2, in_rec_at_entry, line);
}
rack->r_ctl.rc_prr_recovery_fs = tp->snd_max - tp->snd_una;
tp->snd_recover = tp->snd_max;
@@ -4772,7 +4774,7 @@
ctf_fixed_maxseg(tp)) * ctf_fixed_maxseg(tp);
orig_cwnd = tp->snd_cwnd;
tp->snd_cwnd = ctf_fixed_maxseg(tp);
- rack_log_to_prr(rack, 16, orig_cwnd);
+ rack_log_to_prr(rack, 16, orig_cwnd, line);
if (tp->t_flags2 & TF2_ECN_PERMIT)
tp->t_flags2 |= TF2_ECN_SND_CWR;
break;
@@ -4800,7 +4802,7 @@
CC_ALGO(tp)->cong_signal(tp->ccv, type);
}
if ((in_rec_at_entry == 0) && IN_RECOVERY(tp->t_flags)) {
- rack_log_to_prr(rack, 15, cwnd_enter);
+ rack_log_to_prr(rack, 15, cwnd_enter, line);
rack->r_ctl.dsack_byte_cnt = 0;
rack->r_ctl.retran_during_recovery = 0;
rack->r_ctl.rc_cwnd_at_erec = cwnd_enter;
@@ -5105,6 +5107,7 @@
if (rsm == NULL)
return (NULL);
+
if (rsm->r_flags & RACK_ACKED) {
rsm = rack_find_lowest_rsm(rack);
if (rsm == NULL)
@@ -5120,7 +5123,7 @@
return (NULL);
}
/* Ok if we reach here we are over-due and this guy can be sent */
- rack_cong_signal(tp, CC_NDUPACK, tp->snd_una);
+ rack_cong_signal(tp, CC_NDUPACK, tp->snd_una, __LINE__);
return (rsm);
}
@@ -5707,7 +5710,7 @@
* real pacing. And the tlp or rxt is smaller
* than the pacing calculation. Lets not
* pace that long since we know the calculation
- * so far is not accurate.
+ * so far is not accurate.
*/
slot = hpts_timeout;
}
@@ -6069,7 +6072,7 @@
so = tp->t_inpcb->inp_socket;
avail = sbavail(&so->so_snd);
out = tp->snd_max - tp->snd_una;
- if (out > tp->snd_wnd) {
+ if ((out > tp->snd_wnd) || rack->rc_has_collapsed) {
/* special case, we need a retransmission */
collapsed_win = 1;
goto need_retran;
@@ -6123,7 +6126,7 @@
if (out + amm <= tp->snd_wnd) {
rack->r_ctl.rc_prr_sndcnt = amm;
rack->r_ctl.rc_tlp_new_data = amm;
- rack_log_to_prr(rack, 4, 0);
+ rack_log_to_prr(rack, 4, 0, __LINE__);
}
} else
goto need_retran;
@@ -6467,7 +6470,7 @@
if (rack->r_ctl.rc_resend != NULL)
rack->r_ctl.rc_resend->r_flags |= RACK_TO_REXT;
rack->r_ctl.rc_prr_sndcnt = 0;
- rack_log_to_prr(rack, 6, 0);
+ rack_log_to_prr(rack, 6, 0, __LINE__);
rack->r_timer_override = 1;
if ((((tp->t_flags & TF_SACK_PERMIT) == 0)
#ifdef NETFLIX_EXP_DETECTION
@@ -6835,7 +6838,7 @@
tp->snd_recover = tp->snd_max;
tp->t_flags |= TF_ACKNOW;
tp->t_rtttime = 0;
- rack_cong_signal(tp, CC_RTO, tp->snd_una);
+ rack_cong_signal(tp, CC_RTO, tp->snd_una, __LINE__);
out:
return (retval);
}
@@ -7068,6 +7071,20 @@
}
TAILQ_INSERT_TAIL(&rack->r_ctl.rc_tmap, rsm, r_tnext);
rsm->r_in_tmap = 1;
+ /* Take off the must retransmit flag, if its on */
+ if (rsm->r_flags & RACK_MUST_RXT) {
+ if (rack->r_must_retran)
+ rack->r_ctl.rc_out_at_rto -= (rsm->r_end - rsm->r_start);
+ if (SEQ_GEQ(rsm->r_end, rack->r_ctl.rc_snd_max_at_rto)) {
+ /*
+ * We have retransmitted all we need. Clear
+ * any must retransmit flags.
+ */
+ rack->r_must_retran = 0;
+ rack->r_ctl.rc_out_at_rto = 0;
+ }
+ rsm->r_flags &= ~RACK_MUST_RXT;
+ }
if (rsm->r_flags & RACK_SACK_PASSED) {
/* We have retransmitted due to the SACK pass */
rsm->r_flags &= ~RACK_SACK_PASSED;
@@ -7827,7 +7844,7 @@
(!IN_FASTRECOVERY(tp->t_flags))) {
/* Segment was a TLP and our retrans matched */
if (rack->r_ctl.rc_tlp_cwnd_reduce) {
- rack_cong_signal(tp, CC_NDUPACK, tp->snd_una);
+ rack_cong_signal(tp, CC_NDUPACK, tp->snd_una, __LINE__);
}
}
if (SEQ_LT(rack->r_ctl.rc_rack_tmit_time, (uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)])) {
@@ -9014,7 +9031,7 @@
tp->t_flags &= ~TF_PREVVALID;
if (to->to_tsecr == rack_ts_to_msec(rsm->r_tim_lastsent[0])) {
/* The first transmit is what this ack is for */
- rack_cong_signal(tp, CC_RTO_ERR, th_ack);
+ rack_cong_signal(tp, CC_RTO_ERR, th_ack, __LINE__);
}
}
left = th_ack - rsm->r_end;
@@ -9134,7 +9151,7 @@
orig_cwnd = tp->snd_cwnd;
tp->snd_ssthresh = rack->r_ctl.rc_ssthresh_at_erec;
tp->snd_recover = tp->snd_una;
- rack_log_to_prr(rack, 14, orig_cwnd);
+ rack_log_to_prr(rack, 14, orig_cwnd, __LINE__);
EXIT_RECOVERY(tp->t_flags);
}
rack->r_might_revert = 0;
@@ -9328,7 +9345,7 @@
sndcnt /= (long)rack->r_ctl.rc_prr_recovery_fs;
else {
rack->r_ctl.rc_prr_sndcnt = 0;
- rack_log_to_prr(rack, 9, 0);
+ rack_log_to_prr(rack, 9, 0, __LINE__);
sndcnt = 0;
}
sndcnt++;
@@ -9337,7 +9354,7 @@
else
sndcnt = 0;
rack->r_ctl.rc_prr_sndcnt = sndcnt;
- rack_log_to_prr(rack, 10, 0);
+ rack_log_to_prr(rack, 10, 0, __LINE__);
} else {
uint32_t limit;
@@ -9350,10 +9367,10 @@
limit += ctf_fixed_maxseg(tp);
if (tp->snd_ssthresh > pipe) {
rack->r_ctl.rc_prr_sndcnt = min((tp->snd_ssthresh - pipe), limit);
- rack_log_to_prr(rack, 11, 0);
+ rack_log_to_prr(rack, 11, 0, __LINE__);
} else {
rack->r_ctl.rc_prr_sndcnt = min(0, limit);
- rack_log_to_prr(rack, 12, 0);
+ rack_log_to_prr(rack, 12, 0, __LINE__);
}
}
}
@@ -9676,17 +9693,18 @@
tsused = tcp_get_usecs(NULL);
rsm = tcp_rack_output(tp, rack, tsused);
if ((!IN_FASTRECOVERY(tp->t_flags)) &&
- rsm) {
+ rsm &&
+ ((rsm->r_flags & RACK_MUST_RXT) == 0)) {
/* Enter recovery */
entered_recovery = 1;
- rack_cong_signal(tp, CC_NDUPACK, tp->snd_una);
+ rack_cong_signal(tp, CC_NDUPACK, tp->snd_una, __LINE__);
/*
* When we enter recovery we need to assure we send
* one packet.
*/
if (rack->rack_no_prr == 0) {
rack->r_ctl.rc_prr_sndcnt = ctf_fixed_maxseg(tp);
- rack_log_to_prr(rack, 8, 0);
+ rack_log_to_prr(rack, 8, 0, __LINE__);
}
rack->r_timer_override = 1;
rack->r_early = 0;
@@ -9728,6 +9746,19 @@
rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap);
while (rsm && (rsm->r_dupack >= DUP_ACK_THRESHOLD)) {
rsm = TAILQ_NEXT(rsm, r_tnext);
+ if (rsm->r_flags & RACK_MUST_RXT) {
+ /* Sendmap entries that are marked to
+ * be retransmitted do not need dupack's
+ * struck. We get these marks for a number
+ * of reasons (rxt timeout with no sack,
+ * mtu change, or rwnd collapses). When
+ * these events occur, we know we must retransmit
+ * them and mark the sendmap entries. Dupack counting
+ * is not needed since we are already set to retransmit
+ * it as soon as we can.
+ */
+ continue;
+ }
}
if (rsm && (rsm->r_dupack < 0xff)) {
rsm->r_dupack++;
@@ -9746,7 +9777,7 @@
if (rack->r_ctl.rc_resend != NULL) {
if (!IN_FASTRECOVERY(rack->rc_tp->t_flags)) {
rack_cong_signal(rack->rc_tp, CC_NDUPACK,
- rack->rc_tp->snd_una);
+ rack->rc_tp->snd_una, __LINE__);
}
rack->r_wanted_output = 1;
rack->r_timer_override = 1;
@@ -10100,7 +10131,7 @@
tp->t_flags &= ~TF_PREVVALID;
if (tp->t_rxtshift == 1 &&
(int)(ticks - tp->t_badrxtwin) < 0)
- rack_cong_signal(tp, CC_RTO_ERR, th->th_ack);
+ rack_cong_signal(tp, CC_RTO_ERR, th->th_ack, __LINE__);
}
if (acked) {
/* assure we are not backed off */
@@ -10310,14 +10341,27 @@
rack_un_collapse_window(struct tcp_rack *rack)
{
struct rack_sendmap *rsm;
+ int cnt = 0;;
+ rack->r_ctl.rc_out_at_rto = 0;
+ rack->r_ctl.rc_snd_max_at_rto = rack->rc_tp->snd_una;
RB_FOREACH_REVERSE(rsm, rack_rb_tree_head, &rack->r_ctl.rc_mtree) {
- if (rsm->r_flags & RACK_RWND_COLLAPSED)
+ if (rsm->r_flags & RACK_RWND_COLLAPSED) {
rsm->r_flags &= ~RACK_RWND_COLLAPSED;
+ rsm->r_flags |= RACK_MUST_RXT;
+ if (SEQ_GEQ(rsm->r_end, rack->r_ctl.rc_snd_max_at_rto)) {
+ rack->r_ctl.rc_snd_max_at_rto = rsm->r_end;
+ rack->r_ctl.rc_out_at_rto += (rsm->r_end - rsm->r_start);
+ }
+ cnt++;
+ }
else
break;
}
rack->rc_has_collapsed = 0;
+ if (cnt) {
+ rack->r_must_retran = 1;
+ }
}
static void
@@ -10442,7 +10486,7 @@
if ((rack->rc_in_persist == 0) &&
(tp->snd_wnd < min((rack->r_ctl.rc_high_rwnd/2), rack->r_ctl.rc_pace_min_segs)) &&
TCPS_HAVEESTABLISHED(tp->t_state) &&
- (tp->snd_max == tp->snd_una) &&
+ ((tp->snd_max == tp->snd_una) || rack->rc_has_collapsed) &&
sbavail(&tp->t_inpcb->inp_socket->so_snd) &&
(sbavail(&tp->t_inpcb->inp_socket->so_snd) > tp->snd_wnd)) {
/*
@@ -10899,7 +10943,7 @@
if ((rack->rc_in_persist == 0) &&
(tp->snd_wnd < min((rack->r_ctl.rc_high_rwnd/2), rack->r_ctl.rc_pace_min_segs)) &&
TCPS_HAVEESTABLISHED(tp->t_state) &&
- (tp->snd_max == tp->snd_una) &&
+ ((tp->snd_max == tp->snd_una) || rack->rc_has_collapsed) &&
sbavail(&tp->t_inpcb->inp_socket->so_snd) &&
(sbavail(&tp->t_inpcb->inp_socket->so_snd) > tp->snd_wnd)) {
/*
@@ -10933,7 +10977,7 @@
tp->t_flags &= ~TF_PREVVALID;
if (tp->t_rxtshift == 1 &&
(int)(ticks - tp->t_badrxtwin) < 0)
- rack_cong_signal(tp, CC_RTO_ERR, th->th_ack);
+ rack_cong_signal(tp, CC_RTO_ERR, th->th_ack, __LINE__);
}
/*
* Recalculate the transmit timer / rtt.
@@ -13003,7 +13047,7 @@
if ((rack->rc_in_persist == 0) &&
(tp->snd_wnd < min((rack->r_ctl.rc_high_rwnd/2), rack->r_ctl.rc_pace_min_segs)) &&
TCPS_HAVEESTABLISHED(tp->t_state) &&
- (tp->snd_max == tp->snd_una) &&
+ ((tp->snd_max == tp->snd_una) || rack->rc_has_collapsed) &&
sbavail(&tp->t_inpcb->inp_socket->so_snd) &&
(sbavail(&tp->t_inpcb->inp_socket->so_snd) > tp->snd_wnd)) {
/*
@@ -13382,7 +13426,7 @@
tp->t_rcvtime = ticks;
/* Now what about ECN? */
if (tcp_ecn_input_segment(tp, ae->flags, ae->codepoint))
- rack_cong_signal(tp, CC_ECN, ae->ack);
+ rack_cong_signal(tp, CC_ECN, ae->ack, __LINE__);
#ifdef TCP_ACCOUNTING
/* Count for the specific type of ack in */
counter_u64_add(tcp_cnt_counters[ae->ack_val_set], 1);
@@ -13618,7 +13662,7 @@
tp->t_flags &= ~TF_PREVVALID;
if (tp->t_rxtshift == 1 &&
(int)(ticks - tp->t_badrxtwin) < 0)
- rack_cong_signal(tp, CC_RTO_ERR, high_seq);
+ rack_cong_signal(tp, CC_RTO_ERR, high_seq, __LINE__);
}
/* Handle the data in the socket buffer */
KMOD_TCPSTAT_ADD(tcps_rcvackpack, 1);
@@ -14155,7 +14199,7 @@
* this to occur after we've validated the segment.
*/
if (tcp_ecn_input_segment(tp, thflags, iptos))
- rack_cong_signal(tp, CC_ECN, th->th_ack);
+ rack_cong_signal(tp, CC_ECN, th->th_ack, __LINE__);
/*
* If echoed timestamp is later than the current time, fall back to
@@ -14495,6 +14539,9 @@
}
/* ok lets look at this one */
rsm = TAILQ_FIRST(&rack->r_ctl.rc_tmap);
+ if (rack->r_must_retran && rsm && (rsm->r_flags & RACK_MUST_RXT)) {
+ return (rsm);
+ }
if (rsm && ((rsm->r_flags & RACK_ACKED) == 0)) {
goto check_it;
}
@@ -15954,29 +16001,6 @@
slot = rack->r_ctl.rc_min_to;
}
rack_start_hpts_timer(rack, tp, cts, slot, len, 0);
- if (rack->r_must_retran) {
- rack->r_ctl.rc_out_at_rto -= (rsm->r_end - rsm->r_start);
- if ((SEQ_GEQ(rsm->r_end, rack->r_ctl.rc_snd_max_at_rto)) ||
- ((rsm->r_flags & RACK_MUST_RXT) == 0)) {
- /*
- * We have retransmitted all we need. If
- * RACK_MUST_RXT is not set then we need to
- * not retransmit this guy.
- */
- rack->r_must_retran = 0;
- rack->r_ctl.rc_out_at_rto = 0;
- if ((rsm->r_flags & RACK_MUST_RXT) == 0) {
- /* Not one we should rxt */
- goto failed;
- } else {
- /* Clear the flag */
- rsm->r_flags &= ~RACK_MUST_RXT;
- }
- } else {
- /* Remove the flag */
- rsm->r_flags &= ~RACK_MUST_RXT;
- }
- }
#ifdef TCP_ACCOUNTING
crtsc = get_cyclecount();
if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
@@ -16801,9 +16825,10 @@
} else if ((rsm = tcp_rack_output(tp, rack, cts)) != NULL) {
/* We have a retransmit that takes precedence */
if ((!IN_FASTRECOVERY(tp->t_flags)) &&
+ ((rsm->r_flags & RACK_MUST_RXT) == 0) &&
((tp->t_flags & TF_WASFRECOVERY) == 0)) {
/* Enter recovery if not induced by a time-out */
- rack_cong_signal(tp, CC_NDUPACK, tp->snd_una);
+ rack_cong_signal(tp, CC_NDUPACK, tp->snd_una, __LINE__);
}
#ifdef INVARIANTS
if (SEQ_LT(rsm->r_start, tp->snd_una)) {
@@ -16876,7 +16901,6 @@
/* TSNH */
rack->r_must_retran = 0;
rack->r_ctl.rc_out_at_rto = 0;
- rack->r_must_retran = 0;
so = inp->inp_socket;
sb = &so->so_snd;
goto just_return_nolock;
@@ -16941,15 +16965,6 @@
if (ret == 0)
return (0);
}
- if (rsm && (rsm->r_flags & RACK_MUST_RXT)) {
- /*
- * Clear the flag in prep for the send
- * note that if we can't get an mbuf
- * and fail, we won't retransmit this
- * rsm but that should be ok (its rare).
- */
- rsm->r_flags &= ~RACK_MUST_RXT;
- }
so = inp->inp_socket;
sb = &so->so_snd;
if (do_a_prefetch == 0) {
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Nov 18, 3:50 PM (21 h, 17 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14700246
Default Alt Text
D34573.diff (15 KB)
Attached To
Mode
D34573: rack may end up with a struck connection if the rwnd is collapsed on sent data.
Attached
Detach File
Event Timeline
Log In to Comment