D45410.diff
No OneTemporary
Actions

Size

52 KB

Referenced Files

None

Subscribers

None

D45410.diff
View Options

	diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
	--- a/sys/netinet/tcp.h
	+++ b/sys/netinet/tcp.h
	@@ -337,8 +337,7 @@
	#define TCP_RACK_PACING_DIVISOR 1146 /* Pacing divisor given to rate-limit code for burst sizing */
	#define TCP_RACK_PACE_MIN_SEG 1147 /* Pacing min seg size rack will use */
	#define TCP_RACK_DGP_IN_REC 1148 /* Do we use full DGP in recovery? */
	-#define TCP_POLICER_DETECT 1149 /* Do we apply a thresholds to rack to detect and compensate for policers? */
	-#define TCP_RXT_CLAMP TCP_POLICER_DETECT
	+/* #define TCP_POLICER_DETECT 1149 not used */
	#define TCP_HYBRID_PACING 1150 /* Hybrid pacing enablement */
	#define TCP_PACING_DND 1151 /* When pacing with rr_config=3 can sacks disturb us */
	#define TCP_SS_EEXIT 1152 /* Do we do early exit from slowtart if no b/w growth */
	@@ -348,7 +347,7 @@
	#define TCP_REC_IS_DYN 1156 /* Do we allow timely to change recovery multiplier? */
	#define TCP_SIDECHAN_DIS 1157 /* Disable/enable the side-channel */
	#define TCP_FILLCW_RATE_CAP 1158 /* Set a cap for DGP's fillcw */
	-#define TCP_POLICER_MSS 1159 /* Policer MSS requirement */
	+/* #define TCP_POLICER_MSS 1159 not used */
	#define TCP_STACK_SPEC_INFO 1160 /* Get stack specific information (if present) */
	#define RACK_CSPR_IS_FCC 1161
	#define TCP_GP_USE_LTBW 1162 /* how we use lt_bw 0=not, 1=min, 2=max */
	diff --git a/sys/netinet/tcp_log_buf.h b/sys/netinet/tcp_log_buf.h
	--- a/sys/netinet/tcp_log_buf.h
	+++ b/sys/netinet/tcp_log_buf.h
	@@ -201,14 +201,14 @@
	TCP_LOG_OUT, /* Transmit (without other event) 2 */
	TCP_LOG_RTO, /* Retransmit timeout 3 */
	TCP_LOG_SB_WAKE, /* Awaken socket buffer 4 */
	- TCP_LOG_BAD_RETRAN, /* Detected bad retransmission 5 */
	+ TCP_UNUSED_5, /* Detected bad retransmission 5 */
	TCP_LOG_PRR, /* Doing PRR 6 */
	- TCP_LOG_REORDER, /* Detected reorder 7 */
	+ TCP_UNUSED_7, /* Detected reorder 7 */
	TCP_LOG_HPTS, /* Hpts sending a packet 8 */
	BBR_LOG_BBRUPD, /* We updated BBR info 9 */
	BBR_LOG_BBRSND, /* We did a slot calculation and sending is done 10 */
	BBR_LOG_ACKCLEAR, /* A ack clears all outstanding 11 */
	- BBR_LOG_INQUEUE, /* The tcb had a packet input to it 12 */
	+ TCP_UNUSED_12, /* The tcb had a packet input to it 12 */
	BBR_LOG_TIMERSTAR, /* Start a timer 13 */
	BBR_LOG_TIMERCANC, /* Cancel a timer 14 */
	BBR_LOG_ENTREC, /* Entered recovery 15 */
	@@ -245,7 +245,7 @@
	BBR_LOG_REDUCE, /* old bbr log reduce for 4.1 and earlier 46*/
	TCP_LOG_RTT, /* A rtt (in useconds) is being sampled and applied to the srtt algo 47 */
	BBR_LOG_SETTINGS_CHG, /* Settings changed for loss response 48 */
	- BBR_LOG_SRTT_GAIN_EVENT, /* SRTT gaining -- now not used 49 */
	+ TCP_UNUSED_49, /* SRTT gaining -- now not used 49 */
	TCP_LOG_REASS, /* Reassembly buffer logging 50 */
	TCP_HDWR_PACE_SIZE, /* TCP pacing size set (rl and rack uses this) 51 */
	BBR_LOG_HDWR_PACE, /* TCP Hardware pacing log 52 */
	@@ -253,9 +253,9 @@
	TCP_LOG_CONNEND, /* End of connection 54 */
	TCP_LOG_LRO, /* LRO entry 55 */
	TCP_SACK_FILTER_RES, /* Results of SACK Filter 56 */
	- TCP_SAD_DETECT, /* Sack Attack Detection 57 */
	+ TCP_UNUSED_57, /* Sack Attack Detection 57 */
	TCP_TIMELY_WORK, /* Logs regarding Timely CC tweaks 58 */
	- TCP_LOG_USER_EVENT, /* User space event data 59 */
	+ TCP_UNUSED_59, /* User space event data 59 */
	TCP_LOG_SENDFILE, /* sendfile() logging for TCP connections 60 */
	TCP_LOG_REQ_T, /* logging of request tracking 61 */
	TCP_LOG_ACCOUNTING, /* Log of TCP Accounting data 62 */
	@@ -267,7 +267,7 @@
	TCP_RACK_TP_TRIGGERED, /* A rack tracepoint is triggered 68 */
	TCP_HYBRID_PACING_LOG, /* Hybrid pacing log 69 */
	TCP_LOG_PRU, /* TCP protocol user request 70 */
	- TCP_POLICER_DET, /* TCP Policer detectionn 71 */
	+ TCP_UNUSED_71, /* old TCP Policer detectionn, not used 71 */
	TCP_PCM_MEASURE, /* TCP Path Capacity Measurement 72 */
	TCP_LOG_END /* End (keep at end) 73 */
	};
	diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
	--- a/sys/netinet/tcp_stacks/rack.c
	+++ b/sys/netinet/tcp_stacks/rack.c
	@@ -193,17 +193,9 @@
	static int32_t rack_reorder_thresh = 2;
	static int32_t rack_reorder_fade = 60000000; /* 0 - never fade, def 60,000,000
	* - 60 seconds */
	-static uint16_t rack_policer_rxt_thresh= 0; /* 499 = 49.9%, 0 is off */
	-static uint8_t rack_policer_avg_thresh = 0; /* 3.2 */
	-static uint8_t rack_policer_med_thresh = 0; /* 1 - 16 */
	-static uint16_t rack_policer_bucket_reserve = 20; /* How much % is reserved in the bucket */
	-static uint64_t rack_pol_min_bw = 125000; /* 1mbps in Bytes per sec */
	-static uint32_t rack_policer_data_thresh = 64000; /* 64,000 bytes must be sent before we engage */
	-static uint32_t rack_policing_do_bw_comp = 1;
	static uint32_t rack_pcm_every_n_rounds = 100;
	static uint32_t rack_pcm_blast = 0;
	static uint32_t rack_pcm_is_enabled = 1;
	-static uint8_t rack_req_del_mss = 18; /* How many segments need to be sent in a recovery episode to do policer_detection */
	static uint8_t rack_ssthresh_rest_rto_rec = 0; /* Do we restore ssthresh when we have rec -> rto -> rec */

	static uint32_t rack_gp_gain_req = 1200; /* Amount percent wise required to gain to record a round has "gaining" */
	@@ -392,7 +384,6 @@
	counter_u64_t rack_tlp_retran_bytes;
	counter_u64_t rack_to_tot;
	counter_u64_t rack_hot_alloc;
	-counter_u64_t tcp_policer_detected;
	counter_u64_t rack_to_alloc;
	counter_u64_t rack_to_alloc_hard;
	counter_u64_t rack_to_alloc_emerg;
	@@ -558,9 +549,6 @@
	struct socket so, struct tcpcb tp, struct tcpopt *to, int32_t drop_hdrlen,
	int32_t tlen, uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos);

	-static void
	-rack_peg_rxt(struct tcp_rack rack, struct rack_sendmap rsm, uint32_t segsiz);
	-
	static int
	rack_do_closing(struct mbuf m, struct tcphdr th,
	struct socket so, struct tcpcb tp, struct tcpopt *to, int32_t drop_hdrlen,
	@@ -898,7 +886,6 @@
	struct sysctl_oid *rack_measure;
	struct sysctl_oid *rack_probertt;
	struct sysctl_oid *rack_hw_pacing;
	- struct sysctl_oid *rack_policing;

	rack_attack = SYSCTL_ADD_NODE(&rack_sysctl_ctx,
	SYSCTL_CHILDREN(rack_sysctl_root),
	@@ -1551,53 +1538,6 @@
	OID_AUTO, "hystartplusplus", CTLFLAG_RW,
	&rack_do_hystart, 0,
	"Should RACK enable HyStart++ on connections?");
	- /* Policer detection */
	- rack_policing = SYSCTL_ADD_NODE(&rack_sysctl_ctx,
	- SYSCTL_CHILDREN(rack_sysctl_root),
	- OID_AUTO,
	- "policing",
	- CTLFLAG_RW \| CTLFLAG_MPSAFE, 0,
	- "policer detection");
	- SYSCTL_ADD_U16(&rack_sysctl_ctx,
	- SYSCTL_CHILDREN(rack_policing),
	- OID_AUTO, "rxt_thresh", CTLFLAG_RW,
	- &rack_policer_rxt_thresh, 0,
	- "Percentage of retransmits we need to be a possible policer (499 = 49.9 percent)");
	- SYSCTL_ADD_U8(&rack_sysctl_ctx,
	- SYSCTL_CHILDREN(rack_policing),
	- OID_AUTO, "avg_thresh", CTLFLAG_RW,
	- &rack_policer_avg_thresh, 0,
	- "What threshold of average retransmits needed to recover a lost packet (1 - 169 aka 21 = 2.1)?");
	- SYSCTL_ADD_U8(&rack_sysctl_ctx,
	- SYSCTL_CHILDREN(rack_policing),
	- OID_AUTO, "med_thresh", CTLFLAG_RW,
	- &rack_policer_med_thresh, 0,
	- "What threshold of Median retransmits needed to recover a lost packet (1 - 16)?");
	- SYSCTL_ADD_U32(&rack_sysctl_ctx,
	- SYSCTL_CHILDREN(rack_policing),
	- OID_AUTO, "data_thresh", CTLFLAG_RW,
	- &rack_policer_data_thresh, 64000,
	- "How many bytes must have gotten through before we can start doing policer detection?");
	- SYSCTL_ADD_U32(&rack_sysctl_ctx,
	- SYSCTL_CHILDREN(rack_policing),
	- OID_AUTO, "bwcomp", CTLFLAG_RW,
	- &rack_policing_do_bw_comp, 1,
	- "Do we raise up low b/w so that at least pace_max_seg can be sent in the srtt?");
	- SYSCTL_ADD_U8(&rack_sysctl_ctx,
	- SYSCTL_CHILDREN(rack_policing),
	- OID_AUTO, "recmss", CTLFLAG_RW,
	- &rack_req_del_mss, 18,
	- "How many MSS must be delivered during recovery to engage policer detection?");
	- SYSCTL_ADD_U16(&rack_sysctl_ctx,
	- SYSCTL_CHILDREN(rack_policing),
	- OID_AUTO, "res_div", CTLFLAG_RW,
	- &rack_policer_bucket_reserve, 20,
	- "What percentage is reserved in the policer bucket?");
	- SYSCTL_ADD_U64(&rack_sysctl_ctx,
	- SYSCTL_CHILDREN(rack_policing),
	- OID_AUTO, "min_comp_bw", CTLFLAG_RW,
	- &rack_pol_min_bw, 125000,
	- "Do we have a min b/w for b/w compensation (0 = no)?");
	/* Misc rack controls */
	rack_misc = SYSCTL_ADD_NODE(&rack_sysctl_ctx,
	SYSCTL_CHILDREN(rack_sysctl_root),
	@@ -1880,13 +1820,6 @@
	OID_AUTO, "alloc_hot", CTLFLAG_RD,
	&rack_hot_alloc,
	"Total allocations from the top of our list");
	- tcp_policer_detected = counter_u64_alloc(M_WAITOK);
	- SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
	- SYSCTL_CHILDREN(rack_counters),
	- OID_AUTO, "policer_detected", CTLFLAG_RD,
	- &tcp_policer_detected,
	- "Total policer_detections");
	-
	rack_to_alloc = counter_u64_alloc(M_WAITOK);
	SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx,
	SYSCTL_CHILDREN(rack_counters),
	@@ -3429,7 +3362,6 @@
	counter_u64_free(rack_saw_enobuf_hw);
	counter_u64_free(rack_saw_enetunreach);
	counter_u64_free(rack_hot_alloc);
	- counter_u64_free(tcp_policer_detected);
	counter_u64_free(rack_to_alloc);
	counter_u64_free(rack_to_alloc_hard);
	counter_u64_free(rack_to_alloc_emerg);
	@@ -5702,459 +5634,12 @@
	rack->r_wanted_output = 1;
	}

	-static inline uint64_t
	-rack_get_rxt_per(uint64_t snds, uint64_t rxts)
	-{
	- uint64_t rxt_per;
	-
	- if (snds > 0) {
	- rxt_per = rxts * 1000;
	- rxt_per /= snds;
	- } else {
	- /* This is an unlikely path */
	- if (rxts) {
	- /* Its the max it was all re-transmits */
	- rxt_per = 0xffffffffffffffff;
	- } else {
	- rxt_per = 0;
	- }
	- }
	- return (rxt_per);
	-}
	-
	-static void
	-policer_detection_log(struct tcp_rack *rack, uint32_t flex1, uint32_t flex2, uint32_t flex3, uint32_t flex4, uint8_t flex8)
	-{
	- if (tcp_bblogging_on(rack->rc_tp)) {
	- union tcp_log_stackspecific log;
	- struct timeval tv;
	-
	- memset(&log.u_bbr, 0, sizeof(log.u_bbr));
	- log.u_bbr.timeStamp = tcp_get_usecs(&tv);
	- log.u_bbr.flex1 = flex1;
	- log.u_bbr.flex2 = flex2;
	- log.u_bbr.flex3 = flex3;
	- log.u_bbr.flex4 = flex4;
	- log.u_bbr.flex5 = rack->r_ctl.current_policer_bucket;
	- log.u_bbr.flex6 = rack->r_ctl.policer_bucket_size;
	- log.u_bbr.flex7 = 0;
	- log.u_bbr.flex8 = flex8;
	- log.u_bbr.bw_inuse = rack->r_ctl.policer_bw;
	- log.u_bbr.applimited = rack->r_ctl.current_round;
	- log.u_bbr.epoch = rack->r_ctl.policer_max_seg;
	- log.u_bbr.delivered = (uint32_t)rack->r_ctl.bytes_acked_in_recovery;
	- log.u_bbr.cur_del_rate = rack->rc_tp->t_sndbytes;
	- log.u_bbr.delRate = rack->rc_tp->t_snd_rxt_bytes;
	- log.u_bbr.rttProp = rack->r_ctl.gp_bw;
	- log.u_bbr.bbr_state = rack->rc_policer_detected;
	- log.u_bbr.bbr_substate = 0;
	- log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
	- log.u_bbr.use_lt_bw = rack->policer_detect_on;
	- log.u_bbr.lt_epoch = 0;
	- log.u_bbr.pkts_out = 0;
	- tcp_log_event(rack->rc_tp, NULL, NULL, NULL, TCP_POLICER_DET, 0,
	- 0, &log, false, NULL, NULL, 0, &tv);
	- }
	-
	-}
	-
	-static void
	-policer_detection(struct tcpcb tp, struct tcp_rack rack, int post_recovery)
	-{
	- /*
	- * Rack excess rxt accounting is turned on. If we
	- * are above a threshold of rxt's in at least N
	- * rounds, then back off the cwnd and ssthresh
	- * to fit into the long-term b/w.
	- */
	-
	- uint32_t pkts, mid, med, alt_med, avg, segsiz, tot_retran_pkt_count = 0;
	- uint32_t cnt_of_mape_rxt = 0;
	- uint64_t snds, rxts, rxt_per, tim, del, del_bw;
	- int i;
	- struct timeval tv;
	-
	-
	- /*
	- * First is there enough packets delivered during recovery to make
	- * a determiniation of b/w?
	- */
	- segsiz = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
	- if ((rack->rc_policer_detected == 0) &&
	- (rack->r_ctl.policer_del_mss > 0) &&
	- ((uint32_t)rack->r_ctl.policer_del_mss > ((rack->r_ctl.bytes_acked_in_recovery + segsiz - 1)/segsiz))) {
	- /*
	- * Not enough data sent in recovery for initial detection. Once
	- * we have deteced a policer we allow less than the threshold (polcer_del_mss)
	- * amount of data in a recovery to let us fall through and double check
	- * our policer settings and possibly expand or collapse the bucket size and
	- * the polcier b/w.
	- *
	- * Once you are declared to be policed. this block of code cannot be
	- * reached, instead blocks further down will re-check the policer detection
	- * triggers and possibly reset the measurements if somehow we have let the
	- * policer bucket size grow too large.
	- */
	- if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
	- policer_detection_log(rack, rack->r_ctl.policer_del_mss,
	- ((rack->r_ctl.bytes_acked_in_recovery + segsiz - 1)/segsiz),
	- rack->r_ctl.bytes_acked_in_recovery, segsiz, 18);
	- }
	- return;
	- }
	- tcp_get_usecs(&tv);
	- tim = tcp_tv_to_lusectick(&tv) - rack->r_ctl.time_entered_recovery;
	- del = rack->r_ctl.bytes_acked_in_recovery;
	- if (tim > 0)
	- del_bw = (del * (uint64_t)1000000) / tim;
	- else
	- del_bw = 0;
	- /* B/W compensation? */
	-
	- if (rack->r_ctl.pol_bw_comp && ((rack->r_ctl.policer_bw > 0) \|\|
	- (del_bw > 0))) {
	- /*
	- * Sanity check now that the data is in. How long does it
	- * take for us to pace out two of our policer_max_seg's?
	- *
	- * If it is longer than the RTT then we are set
	- * too slow, maybe because of not enough data
	- * sent during recovery.
	- */
	- uint64_t lentime, res, srtt, max_delbw, alt_bw;
	-
	- srtt = (uint64_t)rack_grab_rtt(tp, rack);
	- if ((tp->t_srtt > 0) && (srtt > tp->t_srtt))
	- srtt = tp->t_srtt;
	- lentime = rack->r_ctl.policer_max_seg * (uint64_t)HPTS_USEC_IN_SEC * 2;
	- if (del_bw > rack->r_ctl.policer_bw) {
	- max_delbw = del_bw;
	- } else {
	- max_delbw = rack->r_ctl.policer_bw;
	- }
	- res = lentime / max_delbw;
	- if ((srtt > 0) && (res > srtt)) {
	- /*
	- * At this rate we can not get two policer_maxsegs
	- * out before the ack arrives back.
	- *
	- * Lets at least get it raised up so that
	- * we can be a bit faster than that if possible.
	- */
	- lentime = (rack->r_ctl.policer_max_seg * 2);
	- tim = srtt;
	- alt_bw = (lentime * (uint64_t)HPTS_USEC_IN_SEC) / tim;
	- if (alt_bw > max_delbw) {
	- uint64_t cap_alt_bw;
	-
	- cap_alt_bw = (max_delbw + (max_delbw * rack->r_ctl.pol_bw_comp));
	- if ((rack_pol_min_bw > 0) && (cap_alt_bw < rack_pol_min_bw)) {
	- /* We place a min on the cap which defaults to 1Mbps */
	- cap_alt_bw = rack_pol_min_bw;
	- }
	- if (alt_bw <= cap_alt_bw) {
	- /* It should be */
	- del_bw = alt_bw;
	- policer_detection_log(rack,
	- (uint32_t)tim,
	- rack->r_ctl.policer_max_seg,
	- 0,
	- 0,
	- 16);
	- } else {
	- /*
	- * This is an odd case where likely the RTT is very very
	- * low. And yet it is still being policed. We don't want
	- * to get more than (rack_policing_do_bw_comp+1) x del-rate
	- * where del-rate is what we got in recovery for either the
	- * first Policer Detection(PD) or this PD we are on now.
	- */
	- del_bw = cap_alt_bw;
	- policer_detection_log(rack,
	- (uint32_t)tim,
	- rack->r_ctl.policer_max_seg,
	- (uint32_t)max_delbw,
	- (rack->r_ctl.pol_bw_comp + 1),
	- 16);
	- }
	- }
	- }
	- }
	- snds = tp->t_sndbytes - rack->r_ctl.last_policer_sndbytes;
	- rxts = tp->t_snd_rxt_bytes - rack->r_ctl.last_policer_snd_rxt_bytes;
	- rxt_per = rack_get_rxt_per(snds, rxts);
	- /* Figure up the average and median */
	- for(i = 0; i < RETRAN_CNT_SIZE; i++) {
	- if (rack->r_ctl.rc_cnt_of_retran[i] > 0) {
	- tot_retran_pkt_count += (i + 1) * rack->r_ctl.rc_cnt_of_retran[i];
	- cnt_of_mape_rxt += rack->r_ctl.rc_cnt_of_retran[i];
	- }
	- }
	- if (cnt_of_mape_rxt)
	- avg = (tot_retran_pkt_count * 10)/cnt_of_mape_rxt;
	- else
	- avg = 0;
	- alt_med = med = 0;
	- mid = tot_retran_pkt_count/2;
	- for(i = 0; i < RETRAN_CNT_SIZE; i++) {
	- pkts = (i + 1) * rack->r_ctl.rc_cnt_of_retran[i];
	- if (mid > pkts) {
	- mid -= pkts;
	- continue;
	- }
	- med = (i + 1);
	- break;
	- }
	- mid = cnt_of_mape_rxt / 2;
	- for(i = 0; i < RETRAN_CNT_SIZE; i++) {
	- if (mid > rack->r_ctl.rc_cnt_of_retran[i]) {
	- mid -= rack->r_ctl.rc_cnt_of_retran[i];
	- continue;
	- }
	- alt_med = (i + 1);
	- break;
	- }
	- if (rack->r_ctl.policer_alt_median) {
	- /* Swap the medians */
	- uint32_t swap;
	-
	- swap = med;
	- med = alt_med;
	- alt_med = swap;
	- }
	- if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
	- union tcp_log_stackspecific log;
	- struct timeval tv;
	-
	- memset(&log.u_bbr, 0, sizeof(log.u_bbr));
	- log.u_bbr.timeStamp = tcp_get_usecs(&tv);
	- log.u_bbr.flex1 = avg;
	- log.u_bbr.flex2 = med;
	- log.u_bbr.flex3 = (uint32_t)rxt_per;
	- log.u_bbr.flex4 = rack->r_ctl.policer_avg_threshold;
	- log.u_bbr.flex5 = rack->r_ctl.policer_med_threshold;
	- log.u_bbr.flex6 = rack->r_ctl.policer_rxt_threshold;
	- log.u_bbr.flex7 = rack->r_ctl.policer_alt_median;
	- log.u_bbr.flex8 = 1;
	- log.u_bbr.delivered = rack->r_ctl.policer_bucket_size;
	- log.u_bbr.applimited = rack->r_ctl.current_round;
	- log.u_bbr.epoch = rack->r_ctl.policer_max_seg;
	- log.u_bbr.bw_inuse = del_bw;
	- log.u_bbr.cur_del_rate = rxts;
	- log.u_bbr.delRate = snds;
	- log.u_bbr.rttProp = rack->r_ctl.gp_bw;
	- log.u_bbr.bbr_state = rack->rc_policer_detected;
	- log.u_bbr.bbr_substate = 0;
	- log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
	- log.u_bbr.use_lt_bw = rack->policer_detect_on;
	- log.u_bbr.lt_epoch = (uint32_t)tim;
	- log.u_bbr.pkts_out = rack->r_ctl.bytes_acked_in_recovery;
	- tcp_log_event(tp, NULL, NULL, NULL, TCP_POLICER_DET, 0,
	- 0, &log, false, NULL, NULL, 0, &tv);
	- }
	- if (med == RETRAN_CNT_SIZE) {
	- /*
	- * If the median is the maximum, then what we
	- * likely have here is a network breakage. Either that
	- * or we are so unlucky that all of our traffic is being
	- * dropped and having to be retransmitted the maximum times
	- * and this just is not how a policer works.
	- *
	- * If it is truely a policer eventually we will come
	- * through and it won't be the maximum.
	- */
	- return;
	- }
	- /* Has enough rounds progressed for us to re-measure? */
	- if ((rxt_per >= (uint64_t)rack->r_ctl.policer_rxt_threshold) &&
	- (avg >= rack->r_ctl.policer_avg_threshold) &&
	- (med >= rack->r_ctl.policer_med_threshold)) {
	- /*
	- * We hit all thresholds that indicate we are
	- * being policed. Now we may be doing this from a rack timeout
	- * which then means the rest of recovery will hopefully go
	- * smoother as we pace. At the end of recovery we will
	- * fall back in here and reset the values using the
	- * results of the entire recovery episode (we could also
	- * hit this as we exit recovery as well which means only
	- * one time in here).
	- *
	- * This is done explicitly that if we hit the thresholds
	- * again in a second recovery we overwrite the values. We do
	- * that because over time, as we pace the policer_bucket_size may
	- * continue to grow. This then provides more and more times when
	- * we are not pacing to the policer rate. This lets us compensate
	- * for when we hit a false positive and those flows continue to
	- * increase. However if its a real policer we will then get over its
	- * limit, over time, again and thus end up back here hitting the
	- * thresholds again.
	- *
	- * The alternative to this is to instead whenever we pace due to
	- * policing in rack_policed_sending we could add the amount len paced to the
	- * idle_snd_una value (which decreases the amount in last_amount_before_rec
	- * since that is always [th_ack - idle_snd_una]). This would then prevent
	- * the polcier_bucket_size from growing in additional recovery episodes
	- * Which would then mean false postives would be pretty much stuck
	- * after things got back to normal (assuming that what caused the
	- * false positive was a small network outage).
	- *
	- */
	- tcp_trace_point(rack->rc_tp, TCP_TP_POLICER_DET);
	- if (rack->rc_policer_detected == 0) {
	- /*
	- * Increment the stat that tells us we identified
	- * a policer only once. Note that if we ever allow
	- * the flag to be cleared (reverted) then we need
	- * to adjust this to not do multi-counting.
	- */
	- counter_u64_add(tcp_policer_detected, 1);
	- }
	- rack->r_ctl.last_policer_sndbytes = tp->t_sndbytes;
	- rack->r_ctl.last_policer_snd_rxt_bytes = tp->t_snd_rxt_bytes;
	- rack->r_ctl.policer_bw = del_bw;
	- rack->r_ctl.policer_max_seg = tcp_get_pacing_burst_size_w_divisor(rack->rc_tp,
	- rack->r_ctl.policer_bw,
	- min(ctf_fixed_maxseg(rack->rc_tp),
	- rack->r_ctl.rc_pace_min_segs),
	- 0, NULL,
	- NULL, rack->r_ctl.pace_len_divisor);
	- /* Now what about the policer bucket size */
	- rack->r_ctl.policer_bucket_size = rack->r_ctl.last_amount_before_rec;
	- if (rack->r_ctl.policer_bucket_size < rack->r_ctl.policer_max_seg) {
	- /* We must be able to send our max-seg or else chaos ensues */
	- rack->r_ctl.policer_bucket_size = rack->r_ctl.policer_max_seg * 2;
	- }
	- if (rack->rc_policer_detected == 0)
	- rack->r_ctl.current_policer_bucket = 0;
	- if (tcp_bblogging_on(rack->rc_tp)) {
	- union tcp_log_stackspecific log;
	- struct timeval tv;
	-
	- memset(&log.u_bbr, 0, sizeof(log.u_bbr));
	- log.u_bbr.timeStamp = tcp_get_usecs(&tv);
	- log.u_bbr.flex1 = avg;
	- log.u_bbr.flex2 = med;
	- log.u_bbr.flex3 = rxt_per;
	- log.u_bbr.flex4 = rack->r_ctl.policer_avg_threshold;
	- log.u_bbr.flex5 = rack->r_ctl.policer_med_threshold;
	- log.u_bbr.flex6 = rack->r_ctl.policer_rxt_threshold;
	- log.u_bbr.flex7 = rack->r_ctl.policer_alt_median;
	- log.u_bbr.flex8 = 2;
	- log.u_bbr.applimited = rack->r_ctl.current_round;
	- log.u_bbr.bw_inuse = del_bw;
	- log.u_bbr.delivered = rack->r_ctl.policer_bucket_size;
	- log.u_bbr.cur_del_rate = rxts;
	- log.u_bbr.delRate = snds;
	- log.u_bbr.rttProp = rack->r_ctl.gp_bw;
	- log.u_bbr.bbr_state = rack->rc_policer_detected;
	- log.u_bbr.bbr_substate = 0;
	- log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
	- log.u_bbr.use_lt_bw = rack->policer_detect_on;
	- log.u_bbr.epoch = rack->r_ctl.policer_max_seg;
	- log.u_bbr.lt_epoch = (uint32_t)tim;
	- log.u_bbr.pkts_out = rack->r_ctl.bytes_acked_in_recovery;
	- tcp_log_event(tp, NULL, NULL, NULL, TCP_POLICER_DET, 0,
	- 0, &log, false, NULL, NULL, 0, &tv);
	- /*
	- * Put out an added log, 19, for the sole purpose
	- * of getting the txt/rxt so that we can benchmark
	- * in read-bbrlog the ongoing rxt rate after our
	- * policer invocation in the HYSTART announcments.
	- */
	- memset(&log.u_bbr, 0, sizeof(log.u_bbr));
	- log.u_bbr.timeStamp = tcp_tv_to_usectick(&tv);
	- log.u_bbr.flex1 = alt_med;
	- log.u_bbr.flex8 = 19;
	- log.u_bbr.cur_del_rate = tp->t_sndbytes;
	- log.u_bbr.delRate = tp->t_snd_rxt_bytes;
	- tcp_log_event(tp, NULL, NULL, NULL, TCP_POLICER_DET, 0,
	- 0, &log, false, NULL, NULL, 0, &tv);
	- }
	- /* Turn off any fast output, thats ended */
	- rack->r_fast_output = 0;
	- /* Mark the time for credits */
	- rack->r_ctl.last_sendtime = tcp_get_u64_usecs(NULL);
	- if (rack->r_rr_config < 2) {
	- /*
	- * We need to be stricter on the RR config so
	- * the pacing has priority.
	- */
	- rack->r_rr_config = 2;
	- }
	- policer_detection_log(rack,
	- rack->r_ctl.idle_snd_una,
	- rack->r_ctl.ack_for_idle,
	- 0,
	- (uint32_t)tim,
	- 14);
	- rack->rc_policer_detected = 1;
	- } else if ((rack->rc_policer_detected == 1) &&
	- (post_recovery == 1)) {
	- /*
	- * If we are exiting recovery and have already detected
	- * we need to possibly update the values.
	- *
	- * First: Update the idle -> recovery sent value.
	- */
	- uint32_t srtt;
	-
	- if (rack->r_ctl.last_amount_before_rec > rack->r_ctl.policer_bucket_size) {
	- rack->r_ctl.policer_bucket_size = rack->r_ctl.last_amount_before_rec;
	- }
	- srtt = (uint64_t)rack_grab_rtt(tp, rack);
	- if ((tp->t_srtt > 0) && (srtt > tp->t_srtt))
	- srtt = tp->t_srtt;
	- if ((srtt != 0) &&
	- (tim < (uint64_t)srtt)) {
	- /*
	- * Not long enough.
	- */
	- if (rack_verbose_logging)
	- policer_detection_log(rack,
	- (uint32_t)tim,
	- 0,
	- 0,
	- 0,
	- 15);
	- return;
	- }
	- /*
	- * Finally update the b/w if its grown.
	- */
	- if (del_bw > rack->r_ctl.policer_bw) {
	- rack->r_ctl.policer_bw = del_bw;
	- rack->r_ctl.policer_max_seg = tcp_get_pacing_burst_size_w_divisor(rack->rc_tp,
	- rack->r_ctl.policer_bw,
	- min(ctf_fixed_maxseg(rack->rc_tp),
	- rack->r_ctl.rc_pace_min_segs),
	- 0, NULL,
	- NULL, rack->r_ctl.pace_len_divisor);
	- if (rack->r_ctl.policer_bucket_size < rack->r_ctl.policer_max_seg) {
	- /* We must be able to send our max-seg or else chaos ensues */
	- rack->r_ctl.policer_bucket_size = rack->r_ctl.policer_max_seg * 2;
	- }
	- }
	- policer_detection_log(rack,
	- rack->r_ctl.idle_snd_una,
	- rack->r_ctl.ack_for_idle,
	- 0,
	- (uint32_t)tim,
	- 3);
	- }
	-}
	-
	static void
	rack_exit_recovery(struct tcpcb tp, struct tcp_rack rack, int how)
	{
	- /* now check with the policer if on */
	- if (rack->policer_detect_on == 1) {
	- policer_detection(tp, rack, 1);
	- }
	/*
	- * Now exit recovery, note we must do the idle set after the policer_detection
	- * to get the amount acked prior to recovery correct.
	+ * Now exit recovery.
	*/
	- rack->r_ctl.idle_snd_una = tp->snd_una;
	EXIT_RECOVERY(tp->t_flags);
	}

	@@ -6260,69 +5745,11 @@
	tp->t_flags &= ~TF_WASFRECOVERY;
	tp->t_flags &= ~TF_WASCRECOVERY;
	if (!IN_FASTRECOVERY(tp->t_flags)) {
	- struct rack_sendmap *rsm;
	- struct timeval tv;
	- uint32_t segsiz;
	-
	/* Check if this is the end of the initial Start-up i.e. initial slow-start */
	if (rack->rc_initial_ss_comp == 0) {
	/* Yep it is the end of the initial slowstart */
	rack->rc_initial_ss_comp = 1;
	}
	- microuptime(&tv);
	- rack->r_ctl.time_entered_recovery = tcp_tv_to_lusectick(&tv);
	- if (SEQ_GEQ(ack, tp->snd_una)) {
	- /*
	- * The ack is above snd_una. Lets see
	- * if we can establish a postive distance from
	- * our idle mark.
	- */
	- rack->r_ctl.ack_for_idle = ack;
	- if (SEQ_GT(ack, rack->r_ctl.idle_snd_una)) {
	- rack->r_ctl.last_amount_before_rec = ack - rack->r_ctl.idle_snd_una;
	- } else {
	- /* No data thru yet */
	- rack->r_ctl.last_amount_before_rec = 0;
	- }
	- } else if (SEQ_GT(tp->snd_una, rack->r_ctl.idle_snd_una)) {
	- /*
	- * The ack is out of order and behind the snd_una. It may
	- * have contained SACK information which we processed else
	- * we would have rejected it.
	- */
	- rack->r_ctl.ack_for_idle = tp->snd_una;
	- rack->r_ctl.last_amount_before_rec = tp->snd_una - rack->r_ctl.idle_snd_una;
	- } else {
	- rack->r_ctl.ack_for_idle = ack;
	- rack->r_ctl.last_amount_before_rec = 0;
	- }
	- if (rack->rc_policer_detected) {
	- /*
	- * If we are being policed and we have a loss, it
	- * means our bucket is now empty. This can happen
	- * where some other flow on the same host sends
	- * that this connection is not aware of.
	- */
	- rack->r_ctl.current_policer_bucket = 0;
	- if (rack_verbose_logging)
	- policer_detection_log(rack, rack->r_ctl.last_amount_before_rec, 0, 0, 0, 4);
	- if (rack->r_ctl.last_amount_before_rec > rack->r_ctl.policer_bucket_size) {
	- rack->r_ctl.policer_bucket_size = rack->r_ctl.last_amount_before_rec;
	- }
	- }
	- memset(rack->r_ctl.rc_cnt_of_retran, 0, sizeof(rack->r_ctl.rc_cnt_of_retran));
	- segsiz = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
	- TAILQ_FOREACH(rsm, &rack->r_ctl.rc_tmap, r_tnext) {
	- /*
	- * Go through the outstanding and re-peg
	- * any that should have been left in the
	- * retransmit list (on a double recovery).
	- */
	- if (rsm->r_act_rxt_cnt > 0) {
	- rack_peg_rxt(rack, rsm, segsiz);
	- }
	- }
	- rack->r_ctl.bytes_acked_in_recovery = 0;
	rack->r_ctl.rc_prr_delivered = 0;
	rack->r_ctl.rc_prr_out = 0;
	rack->r_fast_output = 0;
	@@ -6357,8 +5784,6 @@
	rack->r_fast_output = 0;
	if (IN_RECOVERY(tp->t_flags))
	rack_exit_recovery(tp, rack, 2);
	- rack->r_ctl.bytes_acked_in_recovery = 0;
	- rack->r_ctl.time_entered_recovery = 0;
	orig_cwnd = tp->snd_cwnd;
	rack_log_to_prr(rack, 16, orig_cwnd, line);
	if (CC_ALGO(tp)->cong_signal == NULL) {
	@@ -7059,7 +6484,6 @@
	rack->lt_bw_up = 1;
	rack->r_persist_lt_bw_off = 0;
	}
	- rack->r_ctl.idle_snd_una = tp->snd_una;
	rack->rc_in_persist = 0;
	rack->r_ctl.rc_went_idle_time = 0;
	tp->t_rxtshift = 0;
	@@ -7520,16 +6944,6 @@
	0, 0, 0);
	return (1);
	}
	- if ((rack->policer_detect_on == 1) &&
	- (rack->rc_policer_detected == 0)) {
	- /*
	- * We do this early if we have not
	- * deteceted to attempt to detect
	- * quicker. Normally we want to do this
	- * as recovery exits (and we will again).
	- */
	- policer_detection(tp, rack, 0);
	- }
	return (0);
	}

	@@ -8718,86 +8132,6 @@
	}
	}

	-/*
	- * We maintain an array fo 16 (RETRAN_CNT_SIZE) entries. This
	- * array is zeroed at the start of recovery. Each time a segment
	- * is retransmitted, we translate that into a number of packets
	- * (based on segsiz) and based on how many times its been retransmitted
	- * increment by the number of packets the counter that represents
	- * retansmitted N times. Index 0 is retransmitted 1 time, index 1
	- * is retransmitted 2 times etc.
	- *
	- * So for example when we send a 4344 byte transmission with a 1448
	- * byte segsize, and its the third time we have retransmitted this
	- * segment, we would add to the rc_cnt_of_retran[2] the value of
	- * 3. That represents 3 MSS were retransmitted 3 times (index is
	- * the number of times retranmitted minus 1).
	- */
	-static void
	-rack_peg_rxt(struct tcp_rack rack, struct rack_sendmap rsm, uint32_t segsiz)
	-{
	- int idx;
	- uint32_t peg;
	-
	- peg = ((rsm->r_end - rsm->r_start) + segsiz) - 1;
	- peg /= segsiz;
	- idx = rsm->r_act_rxt_cnt - 1;
	- if (idx >= RETRAN_CNT_SIZE)
	- idx = RETRAN_CNT_SIZE - 1;
	- /* Max of a uint16_t retransmits in a bucket */
	- if ((rack->r_ctl.rc_cnt_of_retran[idx] + peg) < 0xffff)
	- rack->r_ctl.rc_cnt_of_retran[idx] += peg;
	- else
	- rack->r_ctl.rc_cnt_of_retran[idx] = 0xffff;
	-}
	-
	-/*
	- * We maintain an array fo 16 (RETRAN_CNT_SIZE) entries. This
	- * array is zeroed at the start of recovery. Each time a segment
	- * is retransmitted, we translate that into a number of packets
	- * (based on segsiz) and based on how many times its been retransmitted
	- * increment by the number of packets the counter that represents
	- * retansmitted N times. Index 0 is retransmitted 1 time, index 1
	- * is retransmitted 2 times etc.
	- *
	- * The rack_unpeg_rxt is used when we go to retransmit a segment
	- * again. Basically if the segment had previously been retransmitted
	- * say 3 times (as our previous example illustrated in the comment
	- * above rack_peg_rxt() prior to calling that and incrementing
	- * r_ack_rxt_cnt we would have called rack_unpeg_rxt() that would
	- * subtract back the previous add from its last rxt (in this
	- * example r_act_cnt would have been 2 for 2 retransmissions. So
	- * we would have subtracted 3 from rc_cnt_of_reetran[1] to remove
	- * those 3 segments. You will see this in the rack_update_rsm()
	- * below where we do:
	- * if (rsm->r_act_rxt_cnt > 0) {
	- * rack_unpeg_rxt(rack, rsm, segsiz);
	- * }
	- * rsm->r_act_rxt_cnt++;
	- * rack_peg_rxt(rack, rsm, segsiz);
	- *
	- * This effectively moves the count from rc_cnt_of_retran[1] to
	- * rc_cnt_of_retran[2].
	- */
	-static void
	-rack_unpeg_rxt(struct tcp_rack rack, struct rack_sendmap rsm, uint32_t segsiz)
	-{
	- int idx;
	- uint32_t peg;
	-
	- idx = rsm->r_act_rxt_cnt - 1;
	- if (idx >= RETRAN_CNT_SIZE)
	- idx = RETRAN_CNT_SIZE - 1;
	- peg = ((rsm->r_end - rsm->r_start) + segsiz) - 1;
	- peg /= segsiz;
	- if (peg < rack->r_ctl.rc_cnt_of_retran[idx])
	- rack->r_ctl.rc_cnt_of_retran[idx] -= peg;
	- else {
	- /* TSNH */
	- rack->r_ctl.rc_cnt_of_retran[idx] = 0;
	- }
	-}
	-
	static void
	rack_update_rsm(struct tcpcb tp, struct tcp_rack rack,
	struct rack_sendmap *rsm, uint64_t ts, uint32_t add_flag, int segsiz)
	@@ -8809,13 +8143,8 @@
	rsm->r_rtr_cnt = RACK_NUM_OF_RETRANS;
	rsm->r_flags \|= RACK_OVERMAX;
	}
	- if (rsm->r_act_rxt_cnt > 0) {
	- /* Drop the count back for this, its retransmitting again */
	- rack_unpeg_rxt(rack, rsm, segsiz);
	- }
	rsm->r_act_rxt_cnt++;
	/* Peg the count/index */
	- rack_peg_rxt(rack, rsm, segsiz);
	rack_log_retran_reason(rack, rsm, __LINE__, 0, 2);
	rsm->r_dupack = 0;
	if ((rsm->r_rtr_cnt > 1) && ((rsm->r_flags & RACK_TLP) == 0)) {
	@@ -10768,9 +10097,6 @@
	/* Save off the next one for quick reference. */
	nrsm = tqhash_find(rack->r_ctl.tqh, end);
	*prsm = rack->r_ctl.rc_sacklast = nrsm;
	- if (IN_RECOVERY(tp->t_flags)) {
	- rack->r_ctl.bytes_acked_in_recovery += changed;
	- }
	return (changed);
	}

	@@ -11085,10 +10411,6 @@
	rsm->r_in_tmap = 0;
	}
	newly_acked = 1;
	- if (((rsm->r_flags & RACK_ACKED) == 0) &&
	- (IN_RECOVERY(tp->t_flags))) {
	- rack->r_ctl.bytes_acked_in_recovery += (rsm->r_end - rsm->r_start);
	- }
	if (rsm->r_flags & RACK_ACKED) {
	/*
	* It was acked on the scoreboard -- remove
	@@ -11171,10 +10493,6 @@
	*/
	rack->r_ctl.rc_sacked -= (th_ack - rsm->r_start);
	} else {
	- if (((rsm->r_flags & RACK_ACKED) == 0) &&
	- (IN_RECOVERY(tp->t_flags))) {
	- rack->r_ctl.bytes_acked_in_recovery += (th_ack - rsm->r_start);
	- }
	rack_update_pcm_ack(rack, 1, rsm->r_start, th_ack);
	}
	/* And what about the lost flag? */
	@@ -11325,8 +10643,6 @@
	tp->snd_ssthresh = rack->r_ctl.rto_ssthresh;
	}
	}
	- rack->r_ctl.bytes_acked_in_recovery = 0;
	- rack->r_ctl.time_entered_recovery = 0;
	}
	rack->r_might_revert = 0;
	}
	@@ -12717,8 +12033,6 @@
	if (tp->snd_una == tp->snd_max) {
	/* Nothing left outstanding */
	tp->t_flags &= ~TF_PREVVALID;
	- rack->r_ctl.idle_snd_una = tp->snd_una;
	- rack->r_ctl.rc_went_idle_time = tcp_get_usecs(NULL);
	if (rack->r_ctl.rc_went_idle_time == 0)
	rack->r_ctl.rc_went_idle_time = 1;
	rack->r_ctl.retran_during_recovery = 0;
	@@ -13531,7 +12845,6 @@
	rack->r_ctl.retran_during_recovery = 0;
	rack->rc_suspicious = 0;
	rack->r_ctl.dsack_byte_cnt = 0;
	- rack->r_ctl.idle_snd_una = tp->snd_una;
	rack->r_ctl.rc_went_idle_time = tcp_get_usecs(NULL);
	if (rack->r_ctl.rc_went_idle_time == 0)
	rack->r_ctl.rc_went_idle_time = 1;
	@@ -15250,36 +14563,6 @@
	return (0);
	}

	-static void
	-rack_translate_policer_detect(struct tcp_rack *rack, uint32_t optval)
	-{
	- /*
	- * P = Percent of retransmits 499 = 49.9%
	- * A = Average number 1 (.1%) -> 169 (16.9%)
	- * M = Median number of retrans 1 - 16
	- * MMMM MMMM AAAA AAAA PPPP PPPP PPPP PPPP
	- *
	- */
	- uint16_t per, upp;
	-
	- per = optval & 0x0000ffff;
	- rack->r_ctl.policer_rxt_threshold = (uint32_t)(per & 0xffff);
	- upp = ((optval & 0xffff0000) >> 16);
	- rack->r_ctl.policer_avg_threshold = (0x00ff & upp);
	- rack->r_ctl.policer_med_threshold = ((upp >> 8) & 0x00ff);
	- if ((rack->r_ctl.policer_rxt_threshold > 0) &&
	- (rack->r_ctl.policer_avg_threshold > 0) &&
	- (rack->r_ctl.policer_med_threshold > 0)) {
	- rack->policer_detect_on = 1;
	- } else {
	- rack->policer_detect_on = 0;
	- }
	- rack->r_ctl.saved_policer_val = optval;
	- policer_detection_log(rack, optval,
	- rack->r_ctl.policer_avg_threshold,
	- rack->r_ctl.policer_med_threshold,
	- rack->r_ctl.policer_rxt_threshold, 11);
	-}

	static int32_t
	rack_init(struct tcpcb tp, void *ptr)
	@@ -15351,17 +14634,6 @@
	rack->r_ctl.rc_reorder_fade = rack_reorder_fade;
	rack->rc_allow_data_af_clo = rack_ignore_data_after_close;
	rack->r_ctl.rc_tlp_threshold = rack_tlp_thresh;
	- rack->r_ctl.policer_del_mss = rack_req_del_mss;
	- if ((rack_policer_rxt_thresh > 0) &&
	- (rack_policer_avg_thresh > 0) &&
	- (rack_policer_med_thresh > 0)) {
	- rack->r_ctl.policer_rxt_threshold = rack_policer_rxt_thresh;
	- rack->r_ctl.policer_avg_threshold = rack_policer_avg_thresh;
	- rack->r_ctl.policer_med_threshold = rack_policer_med_thresh;
	- rack->policer_detect_on = 1;
	- } else {
	- rack->policer_detect_on = 0;
	- }
	if (rack_fill_cw_state)
	rack->rc_pace_to_cwnd = 1;
	if (rack_pacing_min_seg)
	@@ -15418,7 +14690,6 @@
	rack->r_ctl.last_tm_mark = 0xffffffffffffffff;
	rack->r_ctl.rc_reorder_shift = rack_reorder_thresh;
	rack->r_ctl.rc_pkt_delay = rack_pkt_delay;
	- rack->r_ctl.pol_bw_comp = rack_policing_do_bw_comp;
	rack->r_ctl.rc_tlp_cwnd_reduce = rack_lower_cwnd_at_tlp;
	rack->r_ctl.rc_lowest_us_rtt = 0xffffffff;
	rack->r_ctl.rc_highest_us_rtt = 0;
	@@ -15454,7 +14725,6 @@
	if (rack_honors_hpts_min_to)
	rack->r_use_hpts_min = 1;
	if (tp->snd_una != 0) {
	- rack->r_ctl.idle_snd_una = tp->snd_una;
	rack->rc_sendvars_notset = 0;
	/*
	* Make sure any TCP timers are not running.
	@@ -18186,116 +17456,6 @@
	return (slot);
	}

	-static uint32_t
	-rack_policer_check_send(struct tcp_rack rack, uint32_t len, uint32_t segsiz, uint32_t needs)
	-{
	- uint64_t calc;
	-
	- rack->rc_policer_should_pace = 0;
	- calc = rack_policer_bucket_reserve * rack->r_ctl.policer_bucket_size;
	- calc /= 100;
	- /*
	- * Now lets look at if we want more than is in the bucket <or>
	- * we want more than is reserved in the bucket.
	- */
	- if (rack_verbose_logging > 0)
	- policer_detection_log(rack, len, segsiz, calc, rack->r_ctl.current_policer_bucket, 8);
	- if ((calc > rack->r_ctl.current_policer_bucket) \|\|
	- (len >= (rack->r_ctl.current_policer_bucket - calc))) {
	- /*
	- * We may want to pace depending on if we are going
	- * into the reserve or not.
	- */
	- uint32_t newlen;
	-
	- if (calc > rack->r_ctl.current_policer_bucket) {
	- /*
	- * This will eat into the reserve if we
	- * don't have room at all some lines
	- * below will catch it.
	- */
	- newlen = rack->r_ctl.policer_max_seg;
	- rack->rc_policer_should_pace = 1;
	- } else {
	- /*
	- * We have all of the reserve plus something in the bucket
	- * that we can give out.
	- */
	- newlen = rack->r_ctl.current_policer_bucket - calc;
	- if (newlen < rack->r_ctl.policer_max_seg) {
	- /*
	- * Into the reserve to get a full policer_max_seg
	- * so we set the len to that and eat into
	- * the reserve. If we go over the code
	- * below will make us wait.
	- */
	- newlen = rack->r_ctl.policer_max_seg;
	- rack->rc_policer_should_pace = 1;
	- }
	- }
	- if (newlen > rack->r_ctl.current_policer_bucket) {
	- /* We have to wait some */
	- *needs = newlen - rack->r_ctl.current_policer_bucket;
	- return (0);
	- }
	- if (rack_verbose_logging > 0)
	- policer_detection_log(rack, len, segsiz, newlen, 0, 9);
	- len = newlen;
	- } /* else we have all len available above the reserve */
	- if (rack_verbose_logging > 0)
	- policer_detection_log(rack, len, segsiz, calc, 0, 10);
	- return (len);
	-}
	-
	-static uint32_t
	-rack_policed_sending(struct tcp_rack rack, struct tcpcb tp, uint32_t len, uint32_t segsiz, int call_line)
	-{
	- /*
	- * Given a send of len, and a token bucket set at current_policer_bucket_size
	- * are we close enough to the end of the bucket that we need to pace? If so
	- * calculate out a time and return it. Otherwise subtract the tokens from
	- * the bucket.
	- */
	- uint64_t calc;
	-
	- if ((rack->r_ctl.policer_bw == 0) \|\|
	- (rack->r_ctl.policer_bucket_size < segsiz)) {
	- /*
	- * We should have an estimate here...
	- */
	- return (0);
	- }
	- calc = (uint64_t)rack_policer_bucket_reserve * (uint64_t)rack->r_ctl.policer_bucket_size;
	- calc /= 100;
	- if ((rack->r_ctl.current_policer_bucket < len) \|\|
	- (rack->rc_policer_should_pace == 1) \|\|
	- ((rack->r_ctl.current_policer_bucket - len) <= (uint32_t)calc)) {
	- /* we need to pace */
	- uint64_t lentim, res;
	- uint32_t slot;
	-
	- lentim = (uint64_t)len * (uint64_t)HPTS_USEC_IN_SEC;
	- res = lentim / rack->r_ctl.policer_bw;
	- slot = (uint32_t)res;
	- if (rack->r_ctl.current_policer_bucket > len)
	- rack->r_ctl.current_policer_bucket -= len;
	- else
	- rack->r_ctl.current_policer_bucket = 0;
	- policer_detection_log(rack, len, slot, (uint32_t)rack_policer_bucket_reserve, call_line, 5);
	- rack->rc_policer_should_pace = 0;
	- return(slot);
	- }
	- /* Just take tokens out of the bucket and let rack do whatever it would have */
	- policer_detection_log(rack, len, 0, (uint32_t)rack_policer_bucket_reserve, call_line, 6);
	- if (len < rack->r_ctl.current_policer_bucket) {
	- rack->r_ctl.current_policer_bucket -= len;
	- } else {
	- rack->r_ctl.current_policer_bucket = 0;
	- }
	- return (0);
	-}
	-
	-
	static int32_t
	rack_get_pacing_delay(struct tcp_rack rack, struct tcpcb tp, uint32_t len, struct rack_sendmap *rsm, uint32_t segsiz, int line)
	{
	@@ -18311,25 +17471,6 @@
	pace_one = 1;
	else
	pace_one = 0;
	- if (rack->rc_policer_detected == 1) {
	- /*
	- * A policer has been detected and we
	- * have all of our data (policer-bw and
	- * policer bucket size) calculated. Call
	- * into the function to find out if we are
	- * overriding the time.
	- */
	- slot = rack_policed_sending(rack, tp, len, segsiz, line);
	- if (slot) {
	- uint64_t logbw;
	-
	- logbw = rack->r_ctl.current_policer_bucket;
	- logbw <<= 32;
	- logbw \|= rack->r_ctl.policer_bucket_size;
	- rack_log_pacing_delay_calc(rack, len, slot, rack->r_ctl.policer_bw, logbw, 0, 89, __LINE__, NULL, 0);
	- return(slot);
	- }
	- }
	if (rack->rc_always_pace == 0) {
	/*
	* We use the most optimistic possible cwnd/srtt for
	@@ -20536,25 +19677,6 @@
	return (NULL);
	}

	-static void
	-rack_credit_back_policer_idle_time(struct tcp_rack *rack, uint64_t idle_t, int line)
	-{
	- /*
	- * We were idle some time (idle_t) and so our policer bucket
	- * needs to grow. It can go no higher than policer_bucket_size.
	- */
	- uint64_t len;
	-
	- len = idle_t * rack->r_ctl.policer_bw;
	- len /= HPTS_USEC_IN_SEC;
	- rack->r_ctl.current_policer_bucket += (uint32_t)len;
	- if (rack->r_ctl.policer_bucket_size < rack->r_ctl.current_policer_bucket) {
	- rack->r_ctl.current_policer_bucket = rack->r_ctl.policer_bucket_size;
	- }
	- if (rack_verbose_logging > 0)
	- policer_detection_log(rack, (uint32_t)len, line, (uint32_t)idle_t, 0, 7);
	-}
	-
	static inline void
	rack_validate_sizes(struct tcp_rack rack, int32_t len, int32_t segsiz, uint32_t pace_max_seg)
	{
	@@ -20872,34 +19994,12 @@
	}
	}
	}
	- if(rack->policer_detect_on) {
	- /*
	- * If we are doing policer detetion we at a minium
	- * record the time but if possible add back to
	- * the bucket based on the idle time.
	- */
	- uint64_t idle_t, u64_cts;
	-
	- segsiz = min(ctf_fixed_maxseg(tp),
	- rack->r_ctl.rc_pace_min_segs);
	- u64_cts = tcp_tv_to_lusectick(&tv);
	- if ((rack->rc_policer_detected == 1) &&
	- (rack->r_ctl.policer_bucket_size > segsiz) &&
	- (rack->r_ctl.policer_bw > 0) &&
	- (u64_cts > rack->r_ctl.last_sendtime)) {
	- /* We are being policed add back the time */
	- idle_t = u64_cts - rack->r_ctl.last_sendtime;
	- rack_credit_back_policer_idle_time(rack, idle_t, __LINE__);
	- }
	- rack->r_ctl.last_sendtime = u64_cts;
	- }
	if (rack_use_fsb &&
	(rack->r_ctl.fsb.tcp_ip_hdr) &&
	(rack->r_fsb_inited == 0) &&
	(rack->r_state != TCPS_CLOSED))
	rack_init_fsb_block(tp, rack, tcp_outflags[tp->t_state]);
	if (rack->rc_sendvars_notset == 1) {
	- rack->r_ctl.idle_snd_una = tp->snd_una;
	rack->rc_sendvars_notset = 0;
	/*
	* Make sure any TCP timers (keep-alive) is not running.
	@@ -21215,19 +20315,10 @@
	((rsm->r_flags & RACK_HAS_FIN) == 0)) {
	int ret;

	- if ((rack->rc_policer_detected == 1) &&
	- (rack->r_ctl.policer_bucket_size > segsiz) &&
	- (rack->r_ctl.policer_bw > 0)) {
	- /* Check to see if there is room */
	- if (rack->r_ctl.current_policer_bucket < len) {
	- goto skip_fast_output;
	- }
	- }
	ret = rack_fast_rsm_output(tp, rack, rsm, ts_val, cts, ms_cts, &tv, len, doing_tlp);
	if (ret == 0)
	return (0);
	}
	-skip_fast_output:
	so = inp->inp_socket;
	sb = &so->so_snd;
	if (do_a_prefetch == 0) {
	@@ -21418,43 +20509,6 @@
	prefetch_so_done = 1;
	}
	orig_len = len;
	- if ((rack->rc_policer_detected == 1) &&
	- (rack->r_ctl.policer_bucket_size > segsiz) &&
	- (rack->r_ctl.policer_bw > 0) &&
	- (len > 0)) {
	- /*
	- * Ok we believe we have a policer watching
	- * what we send, can we send len? If not can
	- * we tune it down to a smaller value?
	- */
	- uint32_t plen, buck_needs;
	-
	- plen = rack_policer_check_send(rack, len, segsiz, &buck_needs);
	- if (plen == 0) {
	- /*
	- * We are not allowed to send. How long
	- * do we need to pace for i.e. how long
	- * before len is available to send?
	- */
	- uint64_t lentime;
	-
	- lentime = buck_needs;
	- lentime *= HPTS_USEC_IN_SEC;
	- lentime /= rack->r_ctl.policer_bw;
	- slot = (uint32_t)lentime;
	- tot_len_this_send = 0;
	- SOCKBUF_UNLOCK(sb);
	- if (rack_verbose_logging > 0)
	- policer_detection_log(rack, len, slot, buck_needs, 0, 12);
	- rack_start_hpts_timer(rack, tp, cts, slot, 0, 0);
	- rack_log_type_just_return(rack, cts, 0, slot, hpts_calling, 0, cwnd_to_use);
	- goto just_return_clean;
	- }
	- if (plen < len) {
	- sendalot = 0;
	- len = plen;
	- }
	- }
	/*
	* Lop off SYN bit if it has already been sent. However, if this is
	* SYN-SENT state and if segment contains data and if we don't know
	@@ -21853,7 +20907,6 @@
	rack->r_ctl.fsb.recwin = recwin;
	slot = rack_get_pacing_delay(rack, tp, tot_len_this_send, NULL, segsiz, __LINE__);
	if ((error == 0) &&
	- (rack->rc_policer_detected == 0) &&
	rack_use_rfo &&
	((flags & (TH_SYN\|TH_FIN)) == 0) &&
	(ipoptlen == 0) &&
	@@ -22038,7 +21091,6 @@
	rack_start_hpts_timer(rack, tp, cts, slot, tot_len_this_send, sup_rack);
	rack_log_type_just_return(rack, cts, tot_len_this_send, slot, hpts_calling, app_limited, cwnd_to_use);
	}
	-just_return_clean:
	#ifdef NETFLIX_SHARED_CWND
	if ((sbavail(sb) == 0) &&
	rack->r_ctl.rc_scw) {
	@@ -23498,7 +22550,6 @@
	(rsm == NULL) &&
	(ipoptlen == 0) &&
	(tp->rcv_numsacks == 0) &&
	- (rack->rc_policer_detected == 0) &&
	rack->r_fsb_inited &&
	TCPS_HAVEESTABLISHED(tp->t_state) &&
	((IN_RECOVERY(tp->t_flags)) == 0) &&
	@@ -23909,28 +22960,7 @@
	static int
	rack_stack_information(struct tcpcb tp, struct stack_specific_info si)
	{
	- /*
	- * Gather rack specific information.
	- */
	- struct tcp_rack *rack;
	-
	- rack = (struct tcp_rack *)tp->t_fb_ptr;
	/* We pulled a SSI info log out what was there */
	- policer_detection_log(rack, rack->rc_highly_buffered, 0, 0, 0, 20);
	- if (rack->policer_detect_on) {
	- si->policer_detection_enabled = 1;
	- if (rack->rc_policer_detected) {
	- si->policer_detected = 1;
	- si->policer_bucket_size = rack->r_ctl.policer_bucket_size;
	- si->policer_last_bw = rack->r_ctl.policer_bw;
	- } else {
	- si->policer_detected = 0;
	- si->policer_bucket_size = 0;
	- si->policer_last_bw = 0;
	- }
	- si->current_round = rack->r_ctl.current_round;
	- si->highly_buffered = rack->rc_highly_buffered;
	- }
	si->bytes_transmitted = tp->t_sndbytes;
	si->bytes_retransmitted = tp->t_snd_rxt_bytes;
	return (0);
	@@ -24161,36 +23191,6 @@
	case TCP_RACK_DGP_IN_REC:
	error = EINVAL;
	break;
	- case TCP_POLICER_DETECT: /* URL:pol_det */
	- RACK_OPTS_INC(tcp_pol_detect);
	- rack_translate_policer_detect(rack, optval);
	- break;
	- case TCP_POLICER_MSS:
	- RACK_OPTS_INC(tcp_pol_mss);
	- rack->r_ctl.policer_del_mss = (uint8_t)optval;
	- if (optval & 0x00000100) {
	- /*
	- * Value is setup like so:
	- * VVVV VVVV VVVV VVVV VVVV VVAI MMMM MMMM
	- * Where MMMM MMMM is MSS setting
	- * I (9th bit) is the Postive value that
	- * says it is being set (if its 0 then the
	- * upper bits 11 - 32 have no meaning.
	- * This allows setting it off with
	- * 0x000001MM.
	- *
	- * The 10th bit is used to turn on the
	- * alternate median (not the expanded one).
	- *
	- */
	- rack->r_ctl.pol_bw_comp = (optval >> 10);
	- }
	- if (optval & 0x00000200) {
	- rack->r_ctl.policer_alt_median = 1;
	- } else {
	- rack->r_ctl.policer_alt_median = 0;
	- }
	- break;
	case TCP_RACK_PACE_TO_FILL:
	RACK_OPTS_INC(tcp_fillcw);
	if (optval == 0)
	@@ -24857,43 +23857,6 @@
	dest->r_limit_scw = src->r_limit_scw;
	cnt++;
	}
	- /* TCP_POLICER_DETECT */
	- if (dest->r_ctl.policer_rxt_threshold != src->r_ctl.policer_rxt_threshold) {
	- dest->r_ctl.policer_rxt_threshold = src->r_ctl.policer_rxt_threshold;
	- cnt++;
	- }
	- if (dest->r_ctl.policer_avg_threshold != src->r_ctl.policer_avg_threshold) {
	- dest->r_ctl.policer_avg_threshold = src->r_ctl.policer_avg_threshold;
	- cnt++;
	- }
	- if (dest->r_ctl.policer_med_threshold != src->r_ctl.policer_med_threshold) {
	- dest->r_ctl.policer_med_threshold = src->r_ctl.policer_med_threshold;
	- cnt++;
	- }
	- if (dest->policer_detect_on != src->policer_detect_on) {
	- dest->policer_detect_on = src->policer_detect_on;
	- cnt++;
	- }
	-
	- if (dest->r_ctl.saved_policer_val != src->r_ctl.saved_policer_val) {
	- dest->r_ctl.saved_policer_val = src->r_ctl.saved_policer_val;
	- cnt++;
	- }
	- /* TCP_POLICER_MSS */
	- if (dest->r_ctl.policer_del_mss != src->r_ctl.policer_del_mss) {
	- dest->r_ctl.policer_del_mss = src->r_ctl.policer_del_mss;
	- cnt++;
	- }
	-
	- if (dest->r_ctl.pol_bw_comp != src->r_ctl.pol_bw_comp) {
	- dest->r_ctl.pol_bw_comp = src->r_ctl.pol_bw_comp;
	- cnt++;
	- }
	-
	- if (dest->r_ctl.policer_alt_median != src->r_ctl.policer_alt_median) {
	- dest->r_ctl.policer_alt_median = src->r_ctl.policer_alt_median;
	- cnt++;
	- }
	/* TCP_RACK_PACE_TO_FILL */
	if (dest->rc_pace_to_cwnd != src->rc_pace_to_cwnd) {
	dest->rc_pace_to_cwnd = src->rc_pace_to_cwnd;
	@@ -25345,8 +24308,6 @@
	case TCP_RACK_PACING_BETA_ECN: /* URL:pacing_beta_ecn */
	case TCP_RACK_PACE_TO_FILL: /* URL:fillcw */
	/* End pacing related */
	- case TCP_POLICER_DETECT: /* URL:pol_det */
	- case TCP_POLICER_MSS: /* URL:pol_mss */
	case TCP_DELACK: /* URL:delack (in base TCP i.e. tcp_hints along with cc etc ) */
	case TCP_RACK_PRR_SENDALOT: /* URL:prr_sendalot */
	case TCP_RACK_MIN_TO: /* URL:min_to */
	@@ -25590,12 +24551,6 @@
	case TCP_RACK_HI_BETA:
	optval = rack->rack_hibeta;
	break;
	- case TCP_POLICER_MSS:
	- optval = rack->r_ctl.policer_del_mss;
	- break;
	- case TCP_POLICER_DETECT:
	- optval = rack->r_ctl.saved_policer_val;
	- break;
	case TCP_DEFER_OPTIONS:
	optval = rack->defer_options;
	break;
	diff --git a/sys/netinet/tcp_stacks/tcp_rack.h b/sys/netinet/tcp_stacks/tcp_rack.h
	--- a/sys/netinet/tcp_stacks/tcp_rack.h
	+++ b/sys/netinet/tcp_stacks/tcp_rack.h
	@@ -484,12 +484,6 @@
	int32_t rc_rtt_diff; /* Timely style rtt diff of our gp_srtt */
	uint64_t last_tmit_time_acked; /* Holds the last cumack point's last send time */
	/* Recovery stats */
	- uint64_t time_entered_recovery;
	- uint64_t bytes_acked_in_recovery;
	- /* Policer Detection */
	- uint64_t last_policer_sndbytes;
	- uint64_t last_policer_snd_rxt_bytes;
	- uint64_t policer_bw;
	uint64_t last_sendtime;

	uint64_t last_gpest;
	@@ -502,19 +496,9 @@
	uint32_t gp_rnd_thresh;
	uint32_t ss_hi_fs;
	uint32_t gate_to_fs;
	- uint32_t policer_max_seg;
	- uint32_t pol_bw_comp;
	- uint16_t policer_rxt_threshold;
	- uint8_t policer_avg_threshold;
	- uint8_t policer_med_threshold;
	uint32_t pcm_max_seg;
	uint32_t last_pcm_round;
	uint32_t pcm_idle_rounds;
	- uint32_t current_policer_bucket;
	- uint32_t policer_bucket_size;
	- uint32_t idle_snd_una;
	- uint32_t ack_for_idle;
	- uint32_t last_amount_before_rec;

	uint32_t rc_gp_srtt; /* Current GP srtt */
	uint32_t rc_prev_gp_srtt; /* Previous RTT */
	@@ -558,7 +542,6 @@
	uint32_t persist_lost_ends;
	uint32_t input_pkt;
	uint32_t saved_input_pkt;
	- uint32_t saved_policer_val; /* The encoded value we used to setup policer detection */
	uint32_t cleared_app_ack_seq;
	uint32_t last_rcv_tstmp_for_rtt;
	uint32_t last_time_of_arm_rcv;
	@@ -578,7 +561,6 @@
	uint16_t rc_cnt_of_retran[RETRAN_CNT_SIZE];
	uint16_t rc_early_recovery_segs; /* Socket option value Lock(a) */
	uint16_t rc_reorder_shift; /* Socket option value Lock(a) */
	- uint8_t policer_del_mss; /* How many mss during recovery for policer detection */
	uint8_t rack_per_upper_bound_ss;
	uint8_t rack_per_upper_bound_ca;
	uint8_t cleared_app_ack;
	@@ -590,7 +572,6 @@
	uint8_t rc_tlp_cwnd_reduce; /* Socket option value Lock(a) */
	uint8_t rc_prr_sendalot;/* Socket option value Lock(a) */
	uint8_t rc_rate_sample_method;
	- uint8_t policer_alt_median; /* Alternate median for policer detection */
	uint8_t full_dgp_in_rec; /* Flag to say if we do full DGP in recovery */
	uint8_t client_suggested_maxseg; /* Not sure what to do with this yet */
	uint8_t use_gp_not_last;
	@@ -792,12 +773,9 @@
	r_collapse_point_valid : 1,
	dgp_on : 1;
	uint16_t rto_from_rec: 1,
	- avail_bit: 1,
	+ avail_bit: 4,
	pcm_in_progress: 1,
	pcm_needed: 1,
	- policer_detect_on: 1, /* Are we detecting policers? */
	- rc_policer_detected : 1, /* We are beiing policed */
	- rc_policer_should_pace : 1, /* The sizing algo thinks we should pace */
	rc_sendvars_notset : 1, /* Inside rack_init send variables (snd_max/una etc) were not set */
	rc_gp_rtt_set : 1,
	rc_gp_dyn_mul : 1,

File Metadata

Mime Type: text/plain
Expires: Sat, Nov 16, 10:42 AM (21 h, 12 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 14657419
Default Alt Text: D45410.diff (52 KB)

D45410.diffNo OneTemporaryActions

D45410.diffView Options

File Metadata

Event Timeline

D45410.diff
No OneTemporary
Actions

D45410.diff
View Options