Page MenuHomeFreeBSD

D28822.diff
No OneTemporary

D28822.diff

diff --git a/share/man/man4/tcp.4 b/share/man/man4/tcp.4
--- a/share/man/man4/tcp.4
+++ b/share/man/man4/tcp.4
@@ -33,7 +33,7 @@
.\"
.\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93
.\"
-.Dd June 29, 2023
+.Dd November 17, 2023
.Dt TCP 4
.Os
.Sh NAME
@@ -480,12 +480,6 @@
with ACK thinning or burst loss events, as chances to run out of the ACK clock
are reduced, preventing lengthy and performance reducing RTO based loss recovery
(default is true).
-.It Va do_prr_conservative
-While doing Proportional Rate Reduction, remain strictly in a packet conserving
-mode, sending only one new packet for each ACK received.
-Helpful when a misconfigured token bucket traffic policer causes persistent
-high losses leading to RTO, but reduces PRR effectiveness in more common settings
-(default is false).
.It Va do_tcpdrain
Flush packets in the
.Tn TCP
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -159,11 +159,6 @@
&VNET_NAME(drop_synfin), 0,
"Drop TCP packets with SYN+FIN set");
-VNET_DEFINE(int, tcp_do_prr_conservative) = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr_conservative, CTLFLAG_VNET | CTLFLAG_RW,
- &VNET_NAME(tcp_do_prr_conservative), 0,
- "Do conservative Proportional Rate Reduction");
-
VNET_DEFINE(int, tcp_do_prr) = 1;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_do_prr), 1,
@@ -1524,7 +1519,8 @@
int drop_hdrlen, int tlen, uint8_t iptos)
{
uint16_t thflags;
- int acked, ourfinisacked, needoutput = 0, sack_changed;
+ int acked, ourfinisacked, needoutput = 0;
+ sackstatus_t sack_changed;
int rstreason, todrop, win, incforsyn = 0;
uint32_t tiwin;
uint16_t nsegs;
@@ -1539,7 +1535,7 @@
thflags = tcp_get_flags(th);
tp->sackhint.last_sack_ack = 0;
- sack_changed = 0;
+ sack_changed = SACK_NOCHANGE;
nsegs = max(1, m->m_pkthdr.lro_nsegs);
NET_EPOCH_ASSERT();
@@ -2582,7 +2578,7 @@
*/
if (th->th_ack != tp->snd_una ||
(tcp_is_sack_recovery(tp, &to) &&
- !sack_changed))
+ (sack_changed == SACK_NOCHANGE)))
break;
else if (!tcp_timer_active(tp, TT_REXMT))
tp->t_dupacks = 0;
@@ -2591,8 +2587,9 @@
cc_ack_received(tp, th, nsegs,
CC_DUPACK);
if (V_tcp_do_prr &&
- IN_FASTRECOVERY(tp->t_flags)) {
- tcp_do_prr_ack(tp, th, &to);
+ IN_FASTRECOVERY(tp->t_flags) &&
+ (tp->t_flags & TF_SACK_PERMIT)) {
+ tcp_do_prr_ack(tp, th, &to, sack_changed);
} else if (tcp_is_sack_recovery(tp, &to) &&
IN_FASTRECOVERY(tp->t_flags)) {
int awnd;
@@ -2667,8 +2664,12 @@
* cc_cong_signal.
*/
if (tcp_is_sack_recovery(tp, &to)) {
+ /*
+ * Exclude Limited Transmit
+ * segments here
+ */
tp->sackhint.prr_delivered =
- tp->sackhint.sacked_bytes;
+ maxseg;
} else {
tp->sackhint.prr_delivered =
imin(tp->snd_max - tp->snd_una,
@@ -2771,7 +2772,7 @@
* counted as dupacks here.
*/
if (tcp_is_sack_recovery(tp, &to) &&
- sack_changed) {
+ (sack_changed != SACK_NOCHANGE)) {
tp->t_dupacks++;
/* limit overhead by setting maxseg last */
if (!IN_FASTRECOVERY(tp->t_flags) &&
@@ -2797,7 +2798,7 @@
if (V_tcp_do_prr && to.to_flags & TOF_SACK) {
tcp_timer_activate(tp, TT_REXMT, 0);
tp->t_rtttime = 0;
- tcp_do_prr_ack(tp, th, &to);
+ tcp_do_prr_ack(tp, th, &to, sack_changed);
tp->t_flags |= TF_ACKNOW;
(void) tcp_output(tp);
} else
@@ -2811,7 +2812,11 @@
if (V_tcp_do_prr) {
tp->sackhint.delivered_data = BYTES_THIS_ACK(tp, th);
tp->snd_fack = th->th_ack;
- tcp_do_prr_ack(tp, th, &to);
+ /*
+ * During ECN cwnd reduction
+ * always use PRR-SSRB
+ */
+ tcp_do_prr_ack(tp, th, &to, SACK_CHANGE);
(void) tcp_output(tp);
}
} else
@@ -3934,7 +3939,7 @@
}
void
-tcp_do_prr_ack(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to)
+tcp_do_prr_ack(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to, sackstatus_t sack_changed)
{
int snd_cnt = 0, limit = 0, del_data = 0, pipe = 0;
int maxseg = tcp_maxseg(tp);
@@ -3974,7 +3979,17 @@
tp->snd_ssthresh, tp->sackhint.recover_fs) -
tp->sackhint.prr_out;
} else {
- if (V_tcp_do_prr_conservative || (del_data == 0))
+ /*
+ * PRR 6937bis heuristic:
+ * - A partial ack without SACK block beneath snd_recover
+ * indicates further loss.
+ * - An SACK scoreboard update adding a new hole indicates
+ * further loss, so be conservative and send at most one
+ * segment.
+ * - Prevent ACK splitting attacks, by being conservative
+ * when no new data is acked.
+ */
+ if ((sack_changed == SACK_NEWLOSS) || (del_data == 0))
limit = tp->sackhint.prr_delivered -
tp->sackhint.prr_out;
else
diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c
--- a/sys/netinet/tcp_sack.c
+++ b/sys/netinet/tcp_sack.c
@@ -544,15 +544,17 @@
* Process cumulative ACK and the TCP SACK option to update the scoreboard.
* tp->snd_holes is an ordered list of holes (oldest to newest, in terms of
* the sequence space).
- * Returns 1 if incoming ACK has previously unknown SACK information,
- * 0 otherwise.
+ * Returns SACK_NEWLOSS if incoming ACK indicates ongoing loss (hole split, new hole),
+ * SACK_CHANGE if incoming ACK has previously unknown SACK information,
+ * SACK_NOCHANGE otherwise.
*/
-int
+sackstatus_t
tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
{
struct sackhole *cur, *temp;
struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp;
- int i, j, num_sack_blks, sack_changed;
+ int i, j, num_sack_blks;
+ sackstatus_t sack_changed;
int delivered_data, left_edge_delta;
tcp_seq loss_hiack = 0;
@@ -563,7 +565,7 @@
INP_WLOCK_ASSERT(tptoinpcb(tp));
num_sack_blks = 0;
- sack_changed = 0;
+ sack_changed = SACK_NOCHANGE;
delivered_data = 0;
left_edge_delta = 0;
/*
@@ -582,7 +584,7 @@
if (SEQ_LT(tp->snd_fack, th_ack)) {
delivered_data += th_ack - tp->snd_una;
tp->snd_fack = th_ack;
- sack_changed = 1;
+ sack_changed = SACK_CHANGE;
}
}
/*
@@ -676,8 +678,8 @@
KASSERT(tp->sackhint.hole_bytes >= 0,
("sackhint hole bytes >= 0"));
tp->snd_fack = sblkp->end;
- sack_changed = 1;
sblkp--;
+ sack_changed = SACK_NEWLOSS;
} else {
/*
* Append a new SACK hole at the tail. If the
@@ -690,9 +692,9 @@
delivered_data += sblkp->end - sblkp->start;
tp->sackhint.hole_bytes += temp->end - temp->start;
tp->snd_fack = sblkp->end;
- sack_changed = 1;
/* Go to the previous sack block. */
sblkp--;
+ sack_changed = SACK_CHANGE;
} else {
/*
* We failed to add a new hole based on the current
@@ -709,7 +711,12 @@
SEQ_LT(tp->snd_fack, sblkp->end)) {
delivered_data += sblkp->end - tp->snd_fack;
tp->snd_fack = sblkp->end;
- sack_changed = 1;
+ /*
+ * While the Scoreboard didn't change in
+ * size, we only ended up here because
+ * some SACK data had to be dismissed.
+ */
+ sack_changed = SACK_NEWLOSS;
}
}
}
@@ -717,7 +724,7 @@
/* fack is advanced. */
delivered_data += sblkp->end - tp->snd_fack;
tp->snd_fack = sblkp->end;
- sack_changed = 1;
+ sack_changed = SACK_CHANGE;
}
cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole. */
loss_hiack = tp->snd_fack;
@@ -769,7 +776,7 @@
(SEQ_MIN(cur->rxmit, cur->end) - cur->start);
KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
("sackhint bytes rtx >= 0"));
- sack_changed = 1;
+ sack_changed = SACK_CHANGE;
if (SEQ_LEQ(sblkp->start, cur->start)) {
/* Data acks at least the beginning of hole. */
if (SEQ_GEQ(sblkp->end, cur->end)) {
@@ -809,6 +816,7 @@
*/
temp = tcp_sackhole_insert(tp, sblkp->end,
cur->end, cur);
+ sack_changed = SACK_NEWLOSS;
if (temp != NULL) {
if (SEQ_GT(cur->rxmit, temp->rxmit)) {
temp->rxmit = cur->rxmit;
@@ -867,7 +875,7 @@
* DupAck for this. Also required
* for RFC6675 rescue retransmission.
*/
- sack_changed = 0;
+ sack_changed = SACK_NOCHANGE;
tp->sackhint.delivered_data = delivered_data;
tp->sackhint.sacked_bytes += delivered_data - left_edge_delta;
tp->sackhint.lost_bytes = tp->sackhint.hole_bytes - notlost_bytes;
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -504,6 +504,13 @@
struct tcphdr tt_t;
};
+/* SACK scoreboard update status */
+typedef enum {
+ SACK_NOCHANGE = 0,
+ SACK_CHANGE,
+ SACK_NEWLOSS
+} sackstatus_t;
+
/* Enable TCP/UDP tunneling port */
#define TCP_TUNNELING_PORT_MIN 0
#define TCP_TUNNELING_PORT_MAX 65535
@@ -1303,7 +1310,6 @@
#define V_tcp_do_lrd VNET(tcp_do_lrd)
#define V_tcp_do_prr VNET(tcp_do_prr)
-#define V_tcp_do_prr_conservative VNET(tcp_do_prr_conservative)
#define V_tcp_do_newcwv VNET(tcp_do_newcwv)
#define V_drop_synfin VNET(drop_synfin)
#define V_path_mtu_discovery VNET(path_mtu_discovery)
@@ -1483,7 +1489,8 @@
uint32_t tcp_new_ts_offset(struct in_conninfo *);
tcp_seq tcp_new_isn(struct in_conninfo *);
-int tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
+sackstatus_t
+ tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
int tcp_dsack_block_exists(struct tcpcb *);
void tcp_update_dsack_list(struct tcpcb *, tcp_seq, tcp_seq);
void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
@@ -1491,7 +1498,7 @@
void tcp_clean_sackreport(struct tcpcb *tp);
void tcp_sack_adjust(struct tcpcb *tp);
struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
-void tcp_do_prr_ack(struct tcpcb *, struct tcphdr *, struct tcpopt *);
+void tcp_do_prr_ack(struct tcpcb *, struct tcphdr *, struct tcpopt *, sackstatus_t);
void tcp_lost_retransmission(struct tcpcb *, struct tcphdr *);
void tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
void tcp_free_sackholes(struct tcpcb *tp);

File Metadata

Mime Type
text/plain
Expires
Sat, Jan 11, 8:04 PM (21 h, 2 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15757321
Default Alt Text
D28822.diff (9 KB)

Event Timeline