Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F115840612
D39697.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
54 KB
Referenced Files
None
Subscribers
None
D39697.diff
View Options
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -145,7 +145,6 @@
* lock is to be obtained and SMR section exited.
*
* Key:
- * (b) - Protected by the hpts lock.
* (c) - Constant after initialization
* (e) - Protected by the SMR section
* (i) - Protected by the inpcb lock
@@ -154,51 +153,6 @@
* (s) - Protected by another subsystem's locks
* (x) - Undefined locking
*
- * Notes on the tcp_hpts:
- *
- * First Hpts lock order is
- * 1) INP_WLOCK()
- * 2) HPTS_LOCK() i.e. hpts->pmtx
- *
- * To insert a TCB on the hpts you *must* be holding the INP_WLOCK().
- * You may check the inp->inp_in_hpts flag without the hpts lock.
- * The hpts is the only one that will clear this flag holding
- * only the hpts lock. This means that in your tcp_output()
- * routine when you test for the inp_in_hpts flag to be 1
- * it may be transitioning to 0 (by the hpts).
- * That's ok since that will just mean an extra call to tcp_output
- * that most likely will find the call you executed
- * (when the mis-match occurred) will have put the TCB back
- * on the hpts and it will return. If your
- * call did not add the inp back to the hpts then you will either
- * over-send or the cwnd will block you from sending more.
- *
- * Note you should also be holding the INP_WLOCK() when you
- * call the remove from the hpts as well. Though usually
- * you are either doing this from a timer, where you need and have
- * the INP_WLOCK() or from destroying your TCB where again
- * you should already have the INP_WLOCK().
- *
- * The inp_hpts_cpu, inp_hpts_cpu_set, inp_input_cpu and
- * inp_input_cpu_set fields are controlled completely by
- * the hpts. Do not ever set these. The inp_hpts_cpu_set
- * and inp_input_cpu_set fields indicate if the hpts has
- * setup the respective cpu field. It is advised if this
- * field is 0, to enqueue the packet with the appropriate
- * hpts_immediate() call. If the _set field is 1, then
- * you may compare the inp_*_cpu field to the curcpu and
- * may want to again insert onto the hpts if these fields
- * are not equal (i.e. you are not on the expected CPU).
- *
- * A note on inp_hpts_calls and inp_input_calls, these
- * flags are set when the hpts calls either the output
- * or do_segment routines respectively. If the routine
- * being called wants to use this, then it needs to
- * clear the flag before returning. The hpts will not
- * clear the flag. The flags can be used to tell if
- * the hpts is the function calling the respective
- * routine.
- *
* A few other notes:
*
* When a read lock is held, stability of the field is guaranteed; to write
@@ -218,41 +172,15 @@
CK_LIST_ENTRY(inpcb) inp_hash; /* (w:h/r:e) hash list */
struct rwlock inp_lock;
/* Cache line #2 (amd64) */
-#define inp_start_zero inp_hpts
+#define inp_start_zero inp_refcount
#define inp_zero_size (sizeof(struct inpcb) - \
offsetof(struct inpcb, inp_start_zero))
- TAILQ_ENTRY(inpcb) inp_hpts; /* pacing out queue next lock(b) */
- uint32_t inp_hpts_gencnt; /* XXXGL */
- uint32_t inp_hpts_request; /* Current hpts request, zero if
- * fits in the pacing window (i&b). */
- /*
- * Note the next fields are protected by a
- * different lock (hpts-lock). This means that
- * they must correspond in size to the smallest
- * protectable bit field (uint8_t on x86, and
- * other platfomrs potentially uint32_t?). Also
- * since CPU switches can occur at different times the two
- * fields can *not* be collapsed into a signal bit field.
- */
-#if defined(__amd64__) || defined(__i386__)
- uint8_t inp_in_hpts; /* on output hpts (lock b) */
-#else
- uint32_t inp_in_hpts; /* on output hpts (lock b) */
-#endif
- volatile uint16_t inp_hpts_cpu; /* Lock (i) */
- volatile uint16_t inp_irq_cpu; /* Set by LRO in behalf of or the driver */
u_int inp_refcount; /* (i) refcount */
int inp_flags; /* (i) generic IP/datagram flags */
int inp_flags2; /* (i) generic IP/datagram flags #2*/
- uint8_t inp_hpts_cpu_set :1, /* on output hpts (i) */
- inp_hpts_calls :1, /* (i) from output hpts */
- inp_irq_cpu_set :1, /* (i) from LRO/Driver */
- inp_spare_bits2 : 3;
uint8_t inp_numa_domain; /* numa domain */
void *inp_ppcb; /* (i) pointer to per-protocol pcb */
struct socket *inp_socket; /* (i) back pointer to socket */
- int32_t inp_hptsslot; /* Hpts wheel slot this tcb is Lock(i&b) */
- uint32_t inp_hpts_drop_reas; /* reason we are dropping the PCB (lock i&b) */
struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */
struct ucred *inp_cred; /* (c) cache of socket cred */
u_int32_t inp_flow; /* (i) IPv6 flow information */
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -1690,7 +1690,6 @@
MPASS(inp->inp_flags & INP_FREED);
MPASS(inp->inp_socket == NULL);
- MPASS(inp->inp_in_hpts == 0);
INP_RUNLOCK(inp);
uma_zfree_smr(inp->inp_pcbinfo->ipi_zone, inp);
return (true);
@@ -1707,7 +1706,6 @@
MPASS(inp->inp_flags & INP_FREED);
MPASS(inp->inp_socket == NULL);
- MPASS(inp->inp_in_hpts == 0);
INP_WUNLOCK(inp);
uma_zfree_smr(inp->inp_pcbinfo->ipi_zone, inp);
return (true);
diff --git a/sys/netinet/tcp_hpts.h b/sys/netinet/tcp_hpts.h
--- a/sys/netinet/tcp_hpts.h
+++ b/sys/netinet/tcp_hpts.h
@@ -111,10 +111,14 @@
*
*/
-
#ifdef _KERNEL
-void tcp_hpts_remove(struct inpcb *);
-bool tcp_in_hpts(struct inpcb *);
+void tcp_hpts_init(struct tcpcb *);
+void tcp_hpts_remove(struct tcpcb *);
+static bool
+tcp_in_hpts(struct tcpcb *tp)
+{
+ return (tp->t_in_hpts == IHPTS_ONQUEUE);
+}
/*
* To insert a TCB on the hpts you *must* be holding the
@@ -140,20 +144,18 @@
* that INP_WLOCK() or from destroying your TCB where again
* you should already have the INP_WLOCK().
*/
-uint32_t tcp_hpts_insert_diag(struct inpcb *inp, uint32_t slot, int32_t line,
+uint32_t tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line,
struct hpts_diag *diag);
#define tcp_hpts_insert(inp, slot) \
tcp_hpts_insert_diag((inp), (slot), __LINE__, NULL)
-void __tcp_set_hpts(struct inpcb *inp, int32_t line);
+void __tcp_set_hpts(struct tcpcb *tp, int32_t line);
#define tcp_set_hpts(a) __tcp_set_hpts(a, __LINE__)
void tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason);
void tcp_run_hpts(void);
-uint16_t hpts_random_cpu(struct inpcb *inp);
-
extern int32_t tcp_min_hptsi_time;
#endif /* _KERNEL */
diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c
--- a/sys/netinet/tcp_hpts.c
+++ b/sys/netinet/tcp_hpts.c
@@ -199,7 +199,7 @@
uint8_t p_fill[3]; /* Fill to 32 bits */
/* Cache line 0x40 */
struct hptsh {
- TAILQ_HEAD(, inpcb) head;
+ TAILQ_HEAD(, tcpcb) head;
uint32_t count;
uint32_t gencnt;
} *p_hptss; /* Hptsi wheel */
@@ -273,12 +273,6 @@
int cpu[MAXCPU];
} hpts_domains[MAXMEMDOM];
-enum {
- IHPTS_NONE = 0,
- IHPTS_ONQUEUE,
- IHPTS_MOVING,
-};
-
counter_u64_t hpts_hopelessly_behind;
SYSCTL_COUNTER_U64(_net_inet_tcp_hpts_stats, OID_AUTO, hopeless, CTLFLAG_RD,
@@ -426,6 +420,17 @@
&tcp_hpts_no_wake_over_thresh, 0,
"When we are over the threshold on the pacer do we prohibit wakeups?");
+static uint16_t
+hpts_random_cpu(void)
+{
+ uint16_t cpuid;
+ uint32_t ran;
+
+ ran = arc4random();
+ cpuid = (((ran & 0xffff) % mp_ncpus) % tcp_pace.rp_num_hptss);
+ return (cpuid);
+}
+
static void
tcp_hpts_log(struct tcp_hpts_entry *hpts, struct tcpcb *tp, struct timeval *tv,
int slots_to_run, int idx, int from_callout)
@@ -489,54 +494,67 @@
}
static void
-inp_hpts_insert(struct inpcb *inp, struct tcp_hpts_entry *hpts)
+tcp_hpts_insert_internal(struct tcpcb *tp, struct tcp_hpts_entry *hpts)
{
+ struct inpcb *inp = tptoinpcb(tp);
struct hptsh *hptsh;
INP_WLOCK_ASSERT(inp);
HPTS_MTX_ASSERT(hpts);
- MPASS(hpts->p_cpu == inp->inp_hpts_cpu);
+ MPASS(hpts->p_cpu == tp->t_hpts_cpu);
MPASS(!(inp->inp_flags & INP_DROPPED));
- hptsh = &hpts->p_hptss[inp->inp_hptsslot];
+ hptsh = &hpts->p_hptss[tp->t_hpts_slot];
- if (inp->inp_in_hpts == IHPTS_NONE) {
- inp->inp_in_hpts = IHPTS_ONQUEUE;
+ if (tp->t_in_hpts == IHPTS_NONE) {
+ tp->t_in_hpts = IHPTS_ONQUEUE;
in_pcbref(inp);
- } else if (inp->inp_in_hpts == IHPTS_MOVING) {
- inp->inp_in_hpts = IHPTS_ONQUEUE;
+ } else if (tp->t_in_hpts == IHPTS_MOVING) {
+ tp->t_in_hpts = IHPTS_ONQUEUE;
} else
- MPASS(inp->inp_in_hpts == IHPTS_ONQUEUE);
- inp->inp_hpts_gencnt = hptsh->gencnt;
+ MPASS(tp->t_in_hpts == IHPTS_ONQUEUE);
+ tp->t_hpts_gencnt = hptsh->gencnt;
- TAILQ_INSERT_TAIL(&hptsh->head, inp, inp_hpts);
+ TAILQ_INSERT_TAIL(&hptsh->head, tp, t_hpts);
hptsh->count++;
hpts->p_on_queue_cnt++;
}
static struct tcp_hpts_entry *
-tcp_hpts_lock(struct inpcb *inp)
+tcp_hpts_lock(struct tcpcb *tp)
{
struct tcp_hpts_entry *hpts;
- INP_LOCK_ASSERT(inp);
+ INP_LOCK_ASSERT(tptoinpcb(tp));
- hpts = tcp_pace.rp_ent[inp->inp_hpts_cpu];
+ hpts = tcp_pace.rp_ent[tp->t_hpts_cpu];
HPTS_LOCK(hpts);
return (hpts);
}
static void
-inp_hpts_release(struct inpcb *inp)
+tcp_hpts_release(struct tcpcb *tp)
{
bool released __diagused;
- inp->inp_in_hpts = IHPTS_NONE;
- released = in_pcbrele_wlocked(inp);
+ tp->t_in_hpts = IHPTS_NONE;
+ released = in_pcbrele_wlocked(tptoinpcb(tp));
MPASS(released == false);
}
+/*
+ * Initialize newborn tcpcb to get ready for use with HPTS.
+ */
+void
+tcp_hpts_init(struct tcpcb *tp)
+{
+
+ tp->t_hpts_cpu = hpts_random_cpu();
+ tp->t_lro_cpu = HPTS_CPU_NONE;
+ MPASS(!(tp->t_flags2 & TF2_HPTS_CPU_SET));
+}
+
/*
* Called normally with the INP_LOCKED but it
* does not matter, the hpts lock is the key
@@ -544,39 +562,39 @@
* INP lock and then get the hpts lock.
*/
void
-tcp_hpts_remove(struct inpcb *inp)
+tcp_hpts_remove(struct tcpcb *tp)
{
struct tcp_hpts_entry *hpts;
struct hptsh *hptsh;
- INP_WLOCK_ASSERT(inp);
+ INP_WLOCK_ASSERT(tptoinpcb(tp));
- hpts = tcp_hpts_lock(inp);
- if (inp->inp_in_hpts == IHPTS_ONQUEUE) {
- hptsh = &hpts->p_hptss[inp->inp_hptsslot];
- inp->inp_hpts_request = 0;
- if (__predict_true(inp->inp_hpts_gencnt == hptsh->gencnt)) {
- TAILQ_REMOVE(&hptsh->head, inp, inp_hpts);
+ hpts = tcp_hpts_lock(tp);
+ if (tp->t_in_hpts == IHPTS_ONQUEUE) {
+ hptsh = &hpts->p_hptss[tp->t_hpts_slot];
+ tp->t_hpts_request = 0;
+ if (__predict_true(tp->t_hpts_gencnt == hptsh->gencnt)) {
+ TAILQ_REMOVE(&hptsh->head, tp, t_hpts);
MPASS(hptsh->count > 0);
hptsh->count--;
MPASS(hpts->p_on_queue_cnt > 0);
hpts->p_on_queue_cnt--;
- inp_hpts_release(inp);
+ tcp_hpts_release(tp);
} else {
/*
* tcp_hptsi() now owns the TAILQ head of this inp.
* Can't TAILQ_REMOVE, just mark it.
*/
#ifdef INVARIANTS
- struct inpcb *tmp;
+ struct tcpcb *tmp;
- TAILQ_FOREACH(tmp, &hptsh->head, inp_hpts)
- MPASS(tmp != inp);
+ TAILQ_FOREACH(tmp, &hptsh->head, t_hpts)
+ MPASS(tmp != tp);
#endif
- inp->inp_in_hpts = IHPTS_MOVING;
- inp->inp_hptsslot = -1;
+ tp->t_in_hpts = IHPTS_MOVING;
+ tp->t_hpts_slot = -1;
}
- } else if (inp->inp_in_hpts == IHPTS_MOVING) {
+ } else if (tp->t_in_hpts == IHPTS_MOVING) {
/*
* Handle a special race condition:
* tcp_hptsi() moves inpcb to detached tailq
@@ -585,18 +603,11 @@
* tcp_hpts_remove() again (we are here!), then in_pcbdrop()
* tcp_hptsi() finds pcb with meaningful slot and INP_DROPPED
*/
- inp->inp_hptsslot = -1;
+ tp->t_hpts_slot = -1;
}
HPTS_UNLOCK(hpts);
}
-bool
-tcp_in_hpts(struct inpcb *inp)
-{
-
- return (inp->inp_in_hpts == IHPTS_ONQUEUE);
-}
-
static inline int
hpts_slot(uint32_t wheel_slot, uint32_t plus)
{
@@ -762,15 +773,15 @@
#ifdef INVARIANTS
static void
-check_if_slot_would_be_wrong(struct tcp_hpts_entry *hpts, struct inpcb *inp, uint32_t inp_hptsslot, int line)
+check_if_slot_would_be_wrong(struct tcp_hpts_entry *hpts, struct tcpcb *tp,
+ uint32_t hptsslot, int line)
{
/*
* Sanity checks for the pacer with invariants
* on insert.
*/
- KASSERT(inp_hptsslot < NUM_OF_HPTSI_SLOTS,
- ("hpts:%p inp:%p slot:%d > max",
- hpts, inp, inp_hptsslot));
+ KASSERT(hptsslot < NUM_OF_HPTSI_SLOTS,
+ ("hpts:%p tp:%p slot:%d > max", hpts, tp, hptsslot));
if ((hpts->p_hpts_active) &&
(hpts->p_wheel_complete == 0)) {
/*
@@ -781,22 +792,21 @@
*/
int distance, yet_to_run;
- distance = hpts_slots_diff(hpts->p_runningslot, inp_hptsslot);
+ distance = hpts_slots_diff(hpts->p_runningslot, hptsslot);
if (hpts->p_runningslot != hpts->p_cur_slot)
yet_to_run = hpts_slots_diff(hpts->p_runningslot, hpts->p_cur_slot);
else
yet_to_run = 0; /* processing last slot */
- KASSERT(yet_to_run <= distance,
- ("hpts:%p inp:%p slot:%d distance:%d yet_to_run:%d rs:%d cs:%d",
- hpts, inp, inp_hptsslot,
- distance, yet_to_run,
- hpts->p_runningslot, hpts->p_cur_slot));
+ KASSERT(yet_to_run <= distance, ("hpts:%p tp:%p slot:%d "
+ "distance:%d yet_to_run:%d rs:%d cs:%d", hpts, tp,
+ hptsslot, distance, yet_to_run, hpts->p_runningslot,
+ hpts->p_cur_slot));
}
}
#endif
uint32_t
-tcp_hpts_insert_diag(struct inpcb *inp, uint32_t slot, int32_t line, struct hpts_diag *diag)
+tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_diag *diag)
{
struct tcp_hpts_entry *hpts;
struct timeval tv;
@@ -804,16 +814,16 @@
int32_t wheel_slot, maxslots;
bool need_wakeup = false;
- INP_WLOCK_ASSERT(inp);
- MPASS(!tcp_in_hpts(inp));
- MPASS(!(inp->inp_flags & INP_DROPPED));
+ INP_WLOCK_ASSERT(tptoinpcb(tp));
+ MPASS(!(tptoinpcb(tp)->inp_flags & INP_DROPPED));
+ MPASS(!tcp_in_hpts(tp));
/*
* We now return the next-slot the hpts will be on, beyond its
* current run (if up) or where it was when it stopped if it is
* sleeping.
*/
- hpts = tcp_hpts_lock(inp);
+ hpts = tcp_hpts_lock(tp);
microuptime(&tv);
if (diag) {
memset(diag, 0, sizeof(struct hpts_diag));
@@ -830,20 +840,20 @@
}
if (slot == 0) {
/* Ok we need to set it on the hpts in the current slot */
- inp->inp_hpts_request = 0;
+ tp->t_hpts_request = 0;
if ((hpts->p_hpts_active == 0) || (hpts->p_wheel_complete)) {
/*
* A sleeping hpts we want in next slot to run
* note that in this state p_prev_slot == p_cur_slot
*/
- inp->inp_hptsslot = hpts_slot(hpts->p_prev_slot, 1);
+ tp->t_hpts_slot = hpts_slot(hpts->p_prev_slot, 1);
if ((hpts->p_on_min_sleep == 0) &&
(hpts->p_hpts_active == 0))
need_wakeup = true;
} else
- inp->inp_hptsslot = hpts->p_runningslot;
- if (__predict_true(inp->inp_in_hpts != IHPTS_MOVING))
- inp_hpts_insert(inp, hpts);
+ tp->t_hpts_slot = hpts->p_runningslot;
+ if (__predict_true(tp->t_in_hpts != IHPTS_MOVING))
+ tcp_hpts_insert_internal(tp, hpts);
if (need_wakeup) {
/*
* Activate the hpts if it is sleeping and its
@@ -880,28 +890,28 @@
*/
slot--;
}
- inp->inp_hptsslot = last_slot;
- inp->inp_hpts_request = slot;
+ tp->t_hpts_slot = last_slot;
+ tp->t_hpts_request = slot;
} else if (maxslots >= slot) {
/* It all fits on the wheel */
- inp->inp_hpts_request = 0;
- inp->inp_hptsslot = hpts_slot(wheel_slot, slot);
+ tp->t_hpts_request = 0;
+ tp->t_hpts_slot = hpts_slot(wheel_slot, slot);
} else {
/* It does not fit */
- inp->inp_hpts_request = slot - maxslots;
- inp->inp_hptsslot = last_slot;
+ tp->t_hpts_request = slot - maxslots;
+ tp->t_hpts_slot = last_slot;
}
if (diag) {
- diag->slot_remaining = inp->inp_hpts_request;
- diag->inp_hptsslot = inp->inp_hptsslot;
+ diag->slot_remaining = tp->t_hpts_request;
+ diag->inp_hptsslot = tp->t_hpts_slot;
}
#ifdef INVARIANTS
- check_if_slot_would_be_wrong(hpts, inp, inp->inp_hptsslot, line);
+ check_if_slot_would_be_wrong(hpts, tp, tp->t_hpts_slot, line);
#endif
- if (__predict_true(inp->inp_in_hpts != IHPTS_MOVING))
- inp_hpts_insert(inp, hpts);
+ if (__predict_true(tp->t_in_hpts != IHPTS_MOVING))
+ tcp_hpts_insert_internal(tp, hpts);
if ((hpts->p_hpts_active == 0) &&
- (inp->inp_hpts_request == 0) &&
+ (tp->t_hpts_request == 0) &&
(hpts->p_on_min_sleep == 0)) {
/*
* The hpts is sleeping and NOT on a minimum
@@ -972,54 +982,35 @@
return (slot_on);
}
-uint16_t
-hpts_random_cpu(struct inpcb *inp){
- /*
- * No flow type set distribute the load randomly.
- */
- uint16_t cpuid;
- uint32_t ran;
-
- /*
- * Shortcut if it is already set. XXXGL: does it happen?
- */
- if (inp->inp_hpts_cpu_set) {
- return (inp->inp_hpts_cpu);
- }
- /* Nothing set use a random number */
- ran = arc4random();
- cpuid = (((ran & 0xffff) % mp_ncpus) % tcp_pace.rp_num_hptss);
- return (cpuid);
-}
-
static uint16_t
-hpts_cpuid(struct inpcb *inp, int *failed)
+hpts_cpuid(struct tcpcb *tp, int *failed)
{
+ struct inpcb *inp = tptoinpcb(tp);
u_int cpuid;
#ifdef NUMA
struct hpts_domain_info *di;
#endif
*failed = 0;
- if (inp->inp_hpts_cpu_set) {
- return (inp->inp_hpts_cpu);
+ if (tp->t_flags2 & TF2_HPTS_CPU_SET) {
+ return (tp->t_hpts_cpu);
}
/*
* If we are using the irq cpu set by LRO or
* the driver then it overrides all other domains.
*/
if (tcp_use_irq_cpu) {
- if (inp->inp_irq_cpu_set == 0) {
+ if (tp->t_lro_cpu == HPTS_CPU_NONE) {
*failed = 1;
- return(0);
+ return (0);
}
- return(inp->inp_irq_cpu);
+ return (tp->t_lro_cpu);
}
/* If one is set the other must be the same */
#ifdef RSS
cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype);
if (cpuid == NETISR_CPUID_NONE)
- return (hpts_random_cpu(inp));
+ return (hpts_random_cpu());
else
return (cpuid);
#endif
@@ -1030,7 +1021,7 @@
*/
if (inp->inp_flowtype == M_HASHTYPE_NONE) {
counter_u64_add(cpu_uses_random, 1);
- return (hpts_random_cpu(inp));
+ return (hpts_random_cpu());
}
/*
* Hash to a thread based on the flowid. If we are using numa,
@@ -1081,12 +1072,10 @@
tcp_hptsi(struct tcp_hpts_entry *hpts, int from_callout)
{
struct tcpcb *tp;
- struct inpcb *inp;
struct timeval tv;
int32_t slots_to_run, i, error;
int32_t loop_cnt = 0;
int32_t did_prefetch = 0;
- int32_t prefetch_ninp = 0;
int32_t prefetch_tp = 0;
int32_t wrap_loop_cnt = 0;
int32_t slot_pos_of_endpoint = 0;
@@ -1154,25 +1143,25 @@
* run them, the extra 10usecs of late (by being
* put behind) does not really matter in this situation.
*/
- TAILQ_FOREACH(inp, &hpts->p_hptss[hpts->p_nxt_slot].head,
- inp_hpts) {
- MPASS(inp->inp_hptsslot == hpts->p_nxt_slot);
- MPASS(inp->inp_hpts_gencnt ==
+ TAILQ_FOREACH(tp, &hpts->p_hptss[hpts->p_nxt_slot].head,
+ t_hpts) {
+ MPASS(tp->t_hpts_slot == hpts->p_nxt_slot);
+ MPASS(tp->t_hpts_gencnt ==
hpts->p_hptss[hpts->p_nxt_slot].gencnt);
- MPASS(inp->inp_in_hpts == IHPTS_ONQUEUE);
+ MPASS(tp->t_in_hpts == IHPTS_ONQUEUE);
/*
* Update gencnt and nextslot accordingly to match
* the new location. This is safe since it takes both
* the INP lock and the pacer mutex to change the
- * inp_hptsslot and inp_hpts_gencnt.
+ * t_hptsslot and t_hpts_gencnt.
*/
- inp->inp_hpts_gencnt =
+ tp->t_hpts_gencnt =
hpts->p_hptss[hpts->p_runningslot].gencnt;
- inp->inp_hptsslot = hpts->p_runningslot;
+ tp->t_hpts_slot = hpts->p_runningslot;
}
TAILQ_CONCAT(&hpts->p_hptss[hpts->p_runningslot].head,
- &hpts->p_hptss[hpts->p_nxt_slot].head, inp_hpts);
+ &hpts->p_hptss[hpts->p_nxt_slot].head, t_hpts);
hpts->p_hptss[hpts->p_runningslot].count +=
hpts->p_hptss[hpts->p_nxt_slot].count;
hpts->p_hptss[hpts->p_nxt_slot].count = 0;
@@ -1191,8 +1180,8 @@
goto no_one;
}
for (i = 0; i < slots_to_run; i++) {
- struct inpcb *inp, *ninp;
- TAILQ_HEAD(, inpcb) head = TAILQ_HEAD_INITIALIZER(head);
+ struct tcpcb *tp, *ntp;
+ TAILQ_HEAD(, tcpcb) head = TAILQ_HEAD_INITIALIZER(head);
struct hptsh *hptsh;
uint32_t runningslot;
@@ -1205,20 +1194,54 @@
runningslot = hpts->p_runningslot;
hptsh = &hpts->p_hptss[runningslot];
- TAILQ_SWAP(&head, &hptsh->head, inpcb, inp_hpts);
+ TAILQ_SWAP(&head, &hptsh->head, tcpcb, t_hpts);
hpts->p_on_queue_cnt -= hptsh->count;
hptsh->count = 0;
hptsh->gencnt++;
HPTS_UNLOCK(hpts);
- TAILQ_FOREACH_SAFE(inp, &head, inp_hpts, ninp) {
+ TAILQ_FOREACH_SAFE(tp, &head, t_hpts, ntp) {
+ struct inpcb *inp = tptoinpcb(tp);
bool set_cpu;
- if (ninp != NULL) {
- /* We prefetch the next inp if possible */
- kern_prefetch(ninp, &prefetch_ninp);
- prefetch_ninp = 1;
+ if (ntp != NULL) {
+ /*
+ * If we have a next tcpcb, see if we can
+ * prefetch it. Note this may seem
+ * "risky" since we have no locks (other
+ * than the previous inp) and there no
+ * assurance that ntp was not pulled while
+ * we were processing tp and freed. If this
+ * occurred it could mean that either:
+ *
+ * a) Its NULL (which is fine we won't go
+ * here) <or> b) Its valid (which is cool we
+ * will prefetch it) <or> c) The inp got
+ * freed back to the slab which was
+ * reallocated. Then the piece of memory was
+ * re-used and something else (not an
+ * address) is in inp_ppcb. If that occurs
+ * we don't crash, but take a TLB shootdown
+ * performance hit (same as if it was NULL
+ * and we tried to pre-fetch it).
+ *
+ * Considering that the likelyhood of <c> is
+ * quite rare we will take a risk on doing
+ * this. If performance drops after testing
+ * we can always take this out. NB: the
+ * kern_prefetch on amd64 actually has
+ * protection against a bad address now via
+ * the DMAP_() tests. This will prevent the
+ * TLB hit, and instead if <c> occurs just
+ * cause us to load cache with a useless
+ * address (to us).
+ *
+ * XXXGL: this comment and the prefetch action
+ * could be outdated after tp == inp change.
+ */
+ kern_prefetch(ntp, &prefetch_tp);
+ prefetch_tp = 1;
}
/* For debugging */
@@ -1232,33 +1255,33 @@
}
INP_WLOCK(inp);
- if (inp->inp_hpts_cpu_set == 0) {
+ if ((tp->t_flags2 & TF2_HPTS_CPU_SET) == 0) {
set_cpu = true;
} else {
set_cpu = false;
}
- if (__predict_false(inp->inp_in_hpts == IHPTS_MOVING)) {
- if (inp->inp_hptsslot == -1) {
- inp->inp_in_hpts = IHPTS_NONE;
+ if (__predict_false(tp->t_in_hpts == IHPTS_MOVING)) {
+ if (tp->t_hpts_slot == -1) {
+ tp->t_in_hpts = IHPTS_NONE;
if (in_pcbrele_wlocked(inp) == false)
INP_WUNLOCK(inp);
} else {
HPTS_LOCK(hpts);
- inp_hpts_insert(inp, hpts);
+ tcp_hpts_insert_internal(tp, hpts);
HPTS_UNLOCK(hpts);
INP_WUNLOCK(inp);
}
continue;
}
- MPASS(inp->inp_in_hpts == IHPTS_ONQUEUE);
+ MPASS(tp->t_in_hpts == IHPTS_ONQUEUE);
MPASS(!(inp->inp_flags & INP_DROPPED));
- KASSERT(runningslot == inp->inp_hptsslot,
+ KASSERT(runningslot == tp->t_hpts_slot,
("Hpts:%p inp:%p slot mis-aligned %u vs %u",
- hpts, inp, runningslot, inp->inp_hptsslot));
+ hpts, inp, runningslot, tp->t_hpts_slot));
- if (inp->inp_hpts_request) {
+ if (tp->t_hpts_request) {
/*
* This guy is deferred out further in time
* then our wheel had available on it.
@@ -1268,38 +1291,36 @@
uint32_t maxslots, last_slot, remaining_slots;
remaining_slots = slots_to_run - (i + 1);
- if (inp->inp_hpts_request > remaining_slots) {
+ if (tp->t_hpts_request > remaining_slots) {
HPTS_LOCK(hpts);
/*
* How far out can we go?
*/
maxslots = max_slots_available(hpts,
hpts->p_cur_slot, &last_slot);
- if (maxslots >= inp->inp_hpts_request) {
+ if (maxslots >= tp->t_hpts_request) {
/* We can place it finally to
* be processed. */
- inp->inp_hptsslot = hpts_slot(
+ tp->t_hpts_slot = hpts_slot(
hpts->p_runningslot,
- inp->inp_hpts_request);
- inp->inp_hpts_request = 0;
+ tp->t_hpts_request);
+ tp->t_hpts_request = 0;
} else {
/* Work off some more time */
- inp->inp_hptsslot = last_slot;
- inp->inp_hpts_request -=
+ tp->t_hpts_slot = last_slot;
+ tp->t_hpts_request -=
maxslots;
}
- inp_hpts_insert(inp, hpts);
+ tcp_hpts_insert_internal(tp, hpts);
HPTS_UNLOCK(hpts);
INP_WUNLOCK(inp);
continue;
}
- inp->inp_hpts_request = 0;
+ tp->t_hpts_request = 0;
/* Fall through we will so do it now */
}
- inp_hpts_release(inp);
- tp = intotcpcb(inp);
- MPASS(tp);
+ tcp_hpts_release(tp);
if (set_cpu) {
/*
* Setup so the next time we will move to
@@ -1318,7 +1339,7 @@
* gets added to the hpts (not this one)
* :-)
*/
- tcp_set_hpts(inp);
+ tcp_set_hpts(tp);
}
CURVNET_SET(inp->inp_vnet);
/* Lets do any logging that we might want to */
@@ -1331,16 +1352,17 @@
did_prefetch = 1;
}
/*
- * We set inp_hpts_calls to 1 before any possible output.
- * The contract with the transport is that if it cares about
- * hpts calling it should clear the flag. That way next time
- * it is called it will know it is hpts.
+ * We set TF2_HPTS_CALLS before any possible output.
+ * The contract with the transport is that if it cares
+ * about hpts calling it should clear the flag. That
+ * way next time it is called it will know it is hpts.
*
- * We also only call tfb_do_queued_segments() <or> tcp_output()
- * it is expected that if segments are queued and come in that
- * the final input mbuf will cause a call to output if it is needed.
+ * We also only call tfb_do_queued_segments() <or>
+ * tcp_output(). It is expected that if segments are
+ * queued and come in that the final input mbuf will
+ * cause a call to output if it is needed.
*/
- inp->inp_hpts_calls = 1;
+ tp->t_flags2 |= TF2_HPTS_CALLS;
if ((inp->inp_flags2 & INP_SUPPORTS_MBUFQ) &&
!STAILQ_EMPTY(&tp->t_inqueue)) {
error = (*tp->t_fb->tfb_do_queued_segments)(tp, 0);
@@ -1353,44 +1375,6 @@
if (error < 0)
goto skip_pacing;
}
- if (ninp) {
- /*
- * If we have a nxt inp, see if we can
- * prefetch it. Note this may seem
- * "risky" since we have no locks (other
- * than the previous inp) and there no
- * assurance that ninp was not pulled while
- * we were processing inp and freed. If this
- * occurred it could mean that either:
- *
- * a) Its NULL (which is fine we won't go
- * here) <or> b) Its valid (which is cool we
- * will prefetch it) <or> c) The inp got
- * freed back to the slab which was
- * reallocated. Then the piece of memory was
- * re-used and something else (not an
- * address) is in inp_ppcb. If that occurs
- * we don't crash, but take a TLB shootdown
- * performance hit (same as if it was NULL
- * and we tried to pre-fetch it).
- *
- * Considering that the likelyhood of <c> is
- * quite rare we will take a risk on doing
- * this. If performance drops after testing
- * we can always take this out. NB: the
- * kern_prefetch on amd64 actually has
- * protection against a bad address now via
- * the DMAP_() tests. This will prevent the
- * TLB hit, and instead if <c> occurs just
- * cause us to load cache with a useless
- * address (to us).
- *
- * XXXGL: with tcpcb == inpcb, I'm unsure this
- * prefetch is still correct and useful.
- */
- kern_prefetch(ninp, &prefetch_tp);
- prefetch_tp = 1;
- }
INP_WUNLOCK(inp);
skip_pacing:
CURVNET_RESTORE();
@@ -1492,18 +1476,18 @@
}
void
-__tcp_set_hpts(struct inpcb *inp, int32_t line)
+__tcp_set_hpts(struct tcpcb *tp, int32_t line)
{
struct tcp_hpts_entry *hpts;
int failed;
- INP_WLOCK_ASSERT(inp);
- hpts = tcp_hpts_lock(inp);
- if ((inp->inp_in_hpts == 0) &&
- (inp->inp_hpts_cpu_set == 0)) {
- inp->inp_hpts_cpu = hpts_cpuid(inp, &failed);
+ INP_WLOCK_ASSERT(tptoinpcb(tp));
+
+ hpts = tcp_hpts_lock(tp);
+ if (tp->t_in_hpts == IHPTS_NONE && !(tp->t_flags2 & TF2_HPTS_CPU_SET)) {
+ tp->t_hpts_cpu = hpts_cpuid(tp, &failed);
if (failed == 0)
- inp->inp_hpts_cpu_set = 1;
+ tp->t_flags2 |= TF2_HPTS_CPU_SET;
}
mtx_unlock(&hpts->p_mtx);
}
diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c
--- a/sys/netinet/tcp_lro.c
+++ b/sys/netinet/tcp_lro.c
@@ -1380,10 +1380,8 @@
INP_WUNLOCK(inp);
return (TCP_LRO_CANNOT);
}
- if ((inp->inp_irq_cpu_set == 0) && (lc->lro_cpu_is_set == 1)) {
- inp->inp_irq_cpu = lc->lro_last_cpu;
- inp->inp_irq_cpu_set = 1;
- }
+ if (tp->t_lro_cpu == HPTS_CPU_NONE && lc->lro_cpu_is_set == 1)
+ tp->t_lro_cpu = lc->lro_last_cpu;
/* Check if the transport doesn't support the needed optimizations. */
if ((inp->inp_flags2 & (INP_SUPPORTS_MBUFQ | INP_MBUF_ACKCMP)) == 0) {
INP_WUNLOCK(inp);
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -739,7 +739,7 @@
int32_t delay_calc = 0;
uint32_t prev_delay = 0;
- if (tcp_in_hpts(inp)) {
+ if (tcp_in_hpts(tp)) {
/* A previous call is already set up */
return;
}
@@ -904,14 +904,14 @@
inp->inp_flags2 &= ~INP_DONT_SACK_QUEUE;
bbr->rc_pacer_started = cts;
- (void)tcp_hpts_insert_diag(inp, HPTS_USEC_TO_SLOTS(slot),
+ (void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(slot),
__LINE__, &diag);
bbr->rc_timer_first = 0;
bbr->bbr_timer_src = frm;
bbr_log_to_start(bbr, cts, hpts_timeout, slot, 1);
bbr_log_hpts_diag(bbr, cts, &diag);
} else if (hpts_timeout) {
- (void)tcp_hpts_insert_diag(inp, HPTS_USEC_TO_SLOTS(hpts_timeout),
+ (void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(hpts_timeout),
__LINE__, &diag);
/*
* We add the flag here as well if the slot is set,
@@ -1050,8 +1050,8 @@
*/
wrong_timer:
if ((bbr->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) == 0) {
- if (tcp_in_hpts(inp))
- tcp_hpts_remove(inp);
+ if (tcp_in_hpts(tp))
+ tcp_hpts_remove(tp);
bbr_timer_cancel(bbr, __LINE__, cts);
bbr_start_hpts_timer(bbr, tp, cts, 1, bbr->r_ctl.rc_last_delay_val,
0);
@@ -1875,7 +1875,7 @@
l->lt_epoch = bbr->r_ctl.rc_lt_epoch;
l->pacing_gain = bbr->r_ctl.rc_bbr_hptsi_gain;
l->cwnd_gain = bbr->r_ctl.rc_bbr_cwnd_gain;
- l->inhpts = tcp_in_hpts(bbr->rc_inp);
+ l->inhpts = tcp_in_hpts(bbr->rc_tp);
l->use_lt_bw = bbr->rc_lt_use_bw;
l->pkts_out = bbr->r_ctl.rc_flight_at_input;
l->pkt_epoch = bbr->r_ctl.rc_pkt_epoch;
@@ -2496,7 +2496,7 @@
log.u_bbr.flex2 = to;
log.u_bbr.flex3 = bbr->r_ctl.rc_hpts_flags;
log.u_bbr.flex4 = slot;
- log.u_bbr.flex5 = bbr->rc_inp->inp_hptsslot;
+ log.u_bbr.flex5 = bbr->rc_tp->t_hpts_slot;
log.u_bbr.flex6 = TICKS_2_USEC(bbr->rc_tp->t_rxtcur);
log.u_bbr.pkts_out = bbr->rc_inp->inp_flags2;
log.u_bbr.flex8 = which;
@@ -3953,7 +3953,7 @@
bbr->rc_tlp_rtx_out = 0;
bbr->r_ctl.recovery_lr = bbr->r_ctl.rc_pkt_epoch_loss_rate;
tcp_bbr_tso_size_check(bbr, bbr->r_ctl.rc_rcvtime);
- if (tcp_in_hpts(bbr->rc_inp) &&
+ if (tcp_in_hpts(bbr->rc_tp) &&
((bbr->r_ctl.rc_hpts_flags & PACE_TMR_RACK) == 0)) {
/*
* When we enter recovery, we need to restart
@@ -5209,7 +5209,7 @@
left = bbr->r_ctl.rc_timer_exp - cts;
ret = -3;
bbr_log_to_processing(bbr, cts, ret, left, hpts_calling);
- tcp_hpts_insert(tptoinpcb(tp), HPTS_USEC_TO_SLOTS(left));
+ tcp_hpts_insert(tp, HPTS_USEC_TO_SLOTS(left));
return (1);
}
bbr->rc_tmr_stopped = 0;
@@ -5240,7 +5240,7 @@
if (bbr->r_ctl.rc_hpts_flags & PACE_TMR_MASK) {
uint8_t hpts_removed = 0;
- if (tcp_in_hpts(bbr->rc_inp) &&
+ if (tcp_in_hpts(bbr->rc_tp) &&
(bbr->rc_timer_first == 1)) {
/*
* If we are canceling timer's when we have the
@@ -5248,7 +5248,7 @@
* must remove ourselves from the hpts.
*/
hpts_removed = 1;
- tcp_hpts_remove(bbr->rc_inp);
+ tcp_hpts_remove(bbr->rc_tp);
if (bbr->r_ctl.rc_last_delay_val) {
/* Update the last hptsi delay too */
uint32_t time_since_send;
@@ -7920,8 +7920,8 @@
* don't want to transfer forward the time
* for our sum's calculations.
*/
- if (tcp_in_hpts(bbr->rc_inp)) {
- tcp_hpts_remove(bbr->rc_inp);
+ if (tcp_in_hpts(bbr->rc_tp)) {
+ tcp_hpts_remove(bbr->rc_tp);
bbr->rc_timer_first = 0;
bbr->r_ctl.rc_hpts_flags = 0;
bbr->r_ctl.rc_last_delay_val = 0;
@@ -9854,8 +9854,8 @@
/* We enter in persists, set the flag appropriately */
bbr->rc_in_persist = 1;
}
- if (tcp_in_hpts(bbr->rc_inp)) {
- tcp_hpts_remove(bbr->rc_inp);
+ if (tcp_in_hpts(bbr->rc_tp)) {
+ tcp_hpts_remove(bbr->rc_tp);
}
}
@@ -11437,7 +11437,7 @@
}
/* Set the flag */
bbr->r_is_v6 = (inp->inp_vflag & INP_IPV6) != 0;
- tcp_set_hpts(inp);
+ tcp_set_hpts(tp);
sack_filter_clear(&bbr->r_ctl.bbr_sf, th->th_ack);
}
if (thflags & TH_ACK) {
@@ -11546,7 +11546,7 @@
*/
if ((tp->snd_max == tp->snd_una) &&
((tp->t_flags & TF_DELACK) == 0) &&
- (tcp_in_hpts(bbr->rc_inp)) &&
+ (tcp_in_hpts(tp)) &&
(bbr->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT)) {
/*
* keep alive not needed if we are hptsi
@@ -11554,8 +11554,8 @@
*/
;
} else {
- if (tcp_in_hpts(bbr->rc_inp)) {
- tcp_hpts_remove(bbr->rc_inp);
+ if (tcp_in_hpts(tp)) {
+ tcp_hpts_remove(tp);
if ((bbr->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) &&
(TSTMP_GT(lcts, bbr->rc_pacer_started))) {
uint32_t del;
@@ -11582,8 +11582,8 @@
bbr_timer_audit(tp, bbr, lcts, &so->so_snd);
}
/* Clear the flag, it may have been cleared by output but we may not have */
- if ((nxt_pkt == 0) && (inp->inp_hpts_calls))
- inp->inp_hpts_calls = 0;
+ if ((nxt_pkt == 0) && (tp->t_flags2 & TF2_HPTS_CALLS))
+ tp->t_hpts_calls &= ~TF2_HPTS_CALLS;
/* Do we have a new state */
if (bbr->r_state != tp->t_state)
bbr_set_state(tp, bbr, tiwin);
@@ -11842,7 +11842,7 @@
int32_t slot = 0;
struct inpcb *inp;
struct sockbuf *sb;
- uint32_t hpts_calling;
+ bool hpts_calling;
#ifdef INET6
struct ip6_hdr *ip6 = NULL;
int32_t isipv6;
@@ -11853,8 +11853,8 @@
memcpy(&bbr->rc_tv, tv, sizeof(struct timeval));
cts = tcp_tv_to_usectick(&bbr->rc_tv);
inp = bbr->rc_inp;
- hpts_calling = inp->inp_hpts_calls;
- inp->inp_hpts_calls = 0;
+ hpts_calling = !!(tp->t_flags2 & TF2_HPTS_CALLS);
+ tp->t_flags2 &= ~TF2_HPTS_CALLS;
so = inp->inp_socket;
sb = &so->so_snd;
if (tp->t_nic_ktls_xmit)
@@ -11884,7 +11884,7 @@
}
#endif
if (((bbr->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) == 0) &&
- tcp_in_hpts(inp)) {
+ tcp_in_hpts(tp)) {
/*
* We are on the hpts for some timer but not hptsi output.
* Possibly remove from the hpts so we can send/recv etc.
@@ -11913,7 +11913,7 @@
return (0);
}
}
- tcp_hpts_remove(inp);
+ tcp_hpts_remove(tp);
bbr_timer_cancel(bbr, __LINE__, cts);
}
if (bbr->r_ctl.rc_last_delay_val) {
@@ -11929,9 +11929,9 @@
if ((bbr->r_timer_override) ||
(tp->t_state < TCPS_ESTABLISHED)) {
/* Timeouts or early states are exempt */
- if (tcp_in_hpts(inp))
- tcp_hpts_remove(inp);
- } else if (tcp_in_hpts(inp)) {
+ if (tcp_in_hpts(tp))
+ tcp_hpts_remove(tp);
+ } else if (tcp_in_hpts(tp)) {
if ((bbr->r_ctl.rc_last_delay_val) &&
(bbr->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) &&
delay_calc) {
@@ -11943,10 +11943,10 @@
*/
counter_u64_add(bbr_out_size[TCP_MSS_ACCT_LATE], 1);
bbr->r_ctl.rc_last_delay_val = 0;
- tcp_hpts_remove(inp);
+ tcp_hpts_remove(tp);
} else if (tp->t_state == TCPS_CLOSED) {
bbr->r_ctl.rc_last_delay_val = 0;
- tcp_hpts_remove(inp);
+ tcp_hpts_remove(tp);
} else {
/*
* On the hpts, you shall not pass! even if ACKNOW
@@ -14088,7 +14088,7 @@
inp->inp_flags2 |= INP_CANNOT_DO_ECN;
inp->inp_flags2 |= INP_SUPPORTS_MBUFQ;
tcp_change_time_units(tp, TCP_TMR_GRANULARITY_TICKS);
- if (inp->inp_in_hpts) {
+ if (tp->t_in_hpts > IHPTS_NONE) {
return;
}
bbr = (struct tcp_bbr *)tp->t_fb_ptr;
@@ -14109,7 +14109,7 @@
}
} else
toval = HPTS_TICKS_PER_SLOT;
- (void)tcp_hpts_insert_diag(inp, HPTS_USEC_TO_SLOTS(toval),
+ (void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(toval),
__LINE__, &diag);
bbr_log_hpts_diag(bbr, cts, &diag);
}
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -2568,7 +2568,7 @@
log.u_bbr.flex5 = rsm->r_start;
log.u_bbr.flex6 = rsm->r_end;
log.u_bbr.flex8 = mod;
- log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
+ log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
log.u_bbr.timeStamp = tcp_get_usecs(&tv);
log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
@@ -2594,7 +2594,7 @@
log.u_bbr.flex2 = to;
log.u_bbr.flex3 = rack->r_ctl.rc_hpts_flags;
log.u_bbr.flex4 = slot;
- log.u_bbr.flex5 = rack->rc_inp->inp_hptsslot;
+ log.u_bbr.flex5 = rack->rc_tp->t_hpts_slot;
log.u_bbr.flex6 = rack->rc_tp->t_rxtcur;
log.u_bbr.flex7 = rack->rc_in_persist;
log.u_bbr.flex8 = which;
@@ -2602,7 +2602,7 @@
log.u_bbr.pkts_out = 0;
else
log.u_bbr.pkts_out = rack->r_ctl.rc_prr_sndcnt;
- log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
+ log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
log.u_bbr.timeStamp = tcp_get_usecs(&tv);
log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
@@ -2629,7 +2629,7 @@
struct timeval tv;
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
- log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
+ log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
log.u_bbr.flex8 = to_num;
log.u_bbr.flex1 = rack->r_ctl.rc_rack_min_rtt;
log.u_bbr.flex2 = rack->rc_rack_rtt;
@@ -2667,7 +2667,7 @@
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.flex8 = flag;
- log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
+ log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
log.u_bbr.cur_del_rate = (uint64_t)prev;
log.u_bbr.delRate = (uint64_t)rsm;
log.u_bbr.rttProp = (uint64_t)next;
@@ -2711,7 +2711,7 @@
union tcp_log_stackspecific log;
struct timeval tv;
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
- log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
+ log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
log.u_bbr.flex1 = t;
log.u_bbr.flex2 = len;
log.u_bbr.flex3 = rack->r_ctl.rc_rack_min_rtt;
@@ -2883,7 +2883,7 @@
struct timeval tv;
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
- log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
+ log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
log.u_bbr.flex1 = line;
log.u_bbr.flex2 = tick;
log.u_bbr.flex3 = tp->t_maxunacktime;
@@ -2909,7 +2909,7 @@
union tcp_log_stackspecific log;
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
- log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
+ log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
log.u_bbr.flex1 = slot;
if (rack->rack_no_prr)
log.u_bbr.flex2 = 0;
@@ -2957,7 +2957,7 @@
log.u_bbr.flex7 <<= 1;
log.u_bbr.flex7 |= rack->r_wanted_output; /* Do we want output */
log.u_bbr.flex8 = rack->rc_in_persist;
- log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
+ log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
log.u_bbr.timeStamp = tcp_get_usecs(&tv);
log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
log.u_bbr.use_lt_bw = rack->r_ent_rec_ns;
@@ -3010,7 +3010,7 @@
struct timeval tv;
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
- log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
+ log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
log.u_bbr.flex1 = slot;
log.u_bbr.flex2 = rack->r_ctl.rc_hpts_flags;
log.u_bbr.flex4 = reason;
@@ -3043,7 +3043,7 @@
union tcp_log_stackspecific log;
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
- log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
+ log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
log.u_bbr.flex1 = line;
log.u_bbr.flex2 = rack->r_ctl.rc_last_output_to;
log.u_bbr.flex3 = flags_on_entry;
@@ -4893,7 +4893,7 @@
rack->r_ctl.rc_app_limited_cnt,
0, 0, 10, __LINE__, NULL, quality);
}
- if (tcp_in_hpts(rack->rc_inp) &&
+ if (tcp_in_hpts(rack->rc_tp) &&
(rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT)) {
/*
* Ok we can't trust the pacer in this case
@@ -4903,7 +4903,7 @@
* Stop the pacer and clear up all the aggregate
* delays etc.
*/
- tcp_hpts_remove(rack->rc_inp);
+ tcp_hpts_remove(rack->rc_tp);
rack->r_ctl.rc_hpts_flags = 0;
rack->r_ctl.rc_last_output_to = 0;
}
@@ -6495,8 +6495,8 @@
struct timeval tv;
uint32_t t_time;
- if (tcp_in_hpts(rack->rc_inp)) {
- tcp_hpts_remove(rack->rc_inp);
+ if (tcp_in_hpts(rack->rc_tp)) {
+ tcp_hpts_remove(rack->rc_tp);
rack->r_ctl.rc_hpts_flags = 0;
}
#ifdef NETFLIX_SHARED_CWND
@@ -6634,7 +6634,7 @@
(tp->t_state == TCPS_LISTEN)) {
return;
}
- if (tcp_in_hpts(inp)) {
+ if (tcp_in_hpts(tp)) {
/* Already on the pacer */
return;
}
@@ -6885,12 +6885,12 @@
* Arrange for the hpts to kick back in after the
* t-o if the t-o does not cause a send.
*/
- (void)tcp_hpts_insert_diag(inp, HPTS_USEC_TO_SLOTS(hpts_timeout),
+ (void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(hpts_timeout),
__LINE__, &diag);
rack_log_hpts_diag(rack, us_cts, &diag, &tv);
rack_log_to_start(rack, cts, hpts_timeout, slot, 0);
} else {
- (void)tcp_hpts_insert_diag(inp, HPTS_USEC_TO_SLOTS(slot),
+ (void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(slot),
__LINE__, &diag);
rack_log_hpts_diag(rack, us_cts, &diag, &tv);
rack_log_to_start(rack, cts, hpts_timeout, slot, 1);
@@ -6905,7 +6905,7 @@
* at the start of this block) are good enough.
*/
rack->r_ctl.rc_hpts_flags &= ~PACE_PKT_OUTPUT;
- (void)tcp_hpts_insert_diag(inp, HPTS_USEC_TO_SLOTS(hpts_timeout),
+ (void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(hpts_timeout),
__LINE__, &diag);
rack_log_hpts_diag(rack, us_cts, &diag, &tv);
rack_log_to_start(rack, cts, hpts_timeout, slot, 0);
@@ -8028,7 +8028,7 @@
rack->rc_inp->inp_flags2 &= ~INP_DONT_SACK_QUEUE;
ret = -3;
left = rack->r_ctl.rc_timer_exp - cts;
- tcp_hpts_insert(tptoinpcb(tp), HPTS_MS_TO_SLOTS(left));
+ tcp_hpts_insert(tp, HPTS_MS_TO_SLOTS(left));
rack_log_to_processing(rack, cts, ret, left);
return (1);
}
@@ -8069,7 +8069,7 @@
if ((rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) &&
((TSTMP_GEQ(us_cts, rack->r_ctl.rc_last_output_to)) ||
((tp->snd_max - tp->snd_una) == 0))) {
- tcp_hpts_remove(rack->rc_inp);
+ tcp_hpts_remove(rack->rc_tp);
hpts_removed = 1;
/* If we were not delayed cancel out the flag. */
if ((tp->snd_max - tp->snd_una) == 0)
@@ -8078,14 +8078,14 @@
}
if (rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK) {
rack->rc_tmr_stopped = rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK;
- if (tcp_in_hpts(rack->rc_inp) &&
+ if (tcp_in_hpts(rack->rc_tp) &&
((rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) == 0)) {
/*
* Canceling timer's when we have no output being
* paced. We also must remove ourselves from the
* hpts.
*/
- tcp_hpts_remove(rack->rc_inp);
+ tcp_hpts_remove(rack->rc_tp);
hpts_removed = 1;
}
rack->r_ctl.rc_hpts_flags &= ~(PACE_TMR_MASK);
@@ -8113,8 +8113,8 @@
/* We enter in persists, set the flag appropriately */
rack->rc_in_persist = 1;
}
- if (tcp_in_hpts(rack->rc_inp)) {
- tcp_hpts_remove(rack->rc_inp);
+ if (tcp_in_hpts(rack->rc_tp)) {
+ tcp_hpts_remove(rack->rc_tp);
}
}
@@ -11383,7 +11383,7 @@
(entered_recovery == 0)) {
rack_update_prr(tp, rack, changed, th_ack);
if ((rsm && (rack->r_ctl.rc_prr_sndcnt >= ctf_fixed_maxseg(tp)) &&
- ((tcp_in_hpts(rack->rc_inp) == 0) &&
+ ((tcp_in_hpts(rack->rc_tp) == 0) &&
((rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) == 0)))) {
/*
* If you are pacing output you don't want
@@ -14572,7 +14572,7 @@
inp->inp_flags2 &= ~INP_SUPPORTS_MBUFQ;
if (rack->r_use_cmp_ack && TCPS_HAVEESTABLISHED(tp->t_state))
rack->rc_inp->inp_flags2 |= INP_MBUF_ACKCMP;
- if (inp->inp_in_hpts) {
+ if (tp->t_in_hpts > IHPTS_NONE) {
/* Strange */
return;
}
@@ -14593,7 +14593,7 @@
}
} else
toval = HPTS_TICKS_PER_SLOT;
- (void)tcp_hpts_insert_diag(inp, HPTS_USEC_TO_SLOTS(toval),
+ (void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(toval),
__LINE__, &diag);
rack_log_hpts_diag(rack, cts, &diag, &tv);
}
@@ -15190,7 +15190,7 @@
if (tov) {
struct hpts_diag diag;
- (void)tcp_hpts_insert_diag(rack->rc_inp, HPTS_USEC_TO_SLOTS(tov),
+ (void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(tov),
__LINE__, &diag);
rack_log_hpts_diag(rack, us_cts, &diag, &rack->r_ctl.act_rcv_time);
}
@@ -15476,7 +15476,7 @@
* We will force the hpts to be stopped if any, and restart
* with the slot set to what was in the saved slot.
*/
- if (tcp_in_hpts(rack->rc_inp)) {
+ if (tcp_in_hpts(rack->rc_tp)) {
if (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) {
uint32_t us_cts;
@@ -15487,7 +15487,7 @@
}
rack->r_ctl.rc_hpts_flags &= ~PACE_PKT_OUTPUT;
}
- tcp_hpts_remove(rack->rc_inp);
+ tcp_hpts_remove(rack->rc_tp);
}
rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
rack_start_hpts_timer(rack, tp, tcp_get_usecs(NULL), 0, 0, 0);
@@ -15568,7 +15568,7 @@
}
#endif
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
- log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
+ log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
if (rack->rack_no_prr == 0)
log.u_bbr.flex1 = rack->r_ctl.rc_prr_sndcnt;
else
@@ -16427,8 +16427,8 @@
}
did_out = 1;
}
- if (rack->rc_inp->inp_hpts_calls)
- rack->rc_inp->inp_hpts_calls = 0;
+ if (tp->t_flags2 & TF2_HPTS_CALLS)
+ tp->t_flags2 &= ~TF2_HPTS_CALLS;
rack_free_trim(rack);
#ifdef TCP_ACCOUNTING
sched_unpin();
@@ -16662,7 +16662,7 @@
}
#endif
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
- log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
+ log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
if (rack->rack_no_prr == 0)
log.u_bbr.flex1 = rack->r_ctl.rc_prr_sndcnt;
else
@@ -16889,7 +16889,7 @@
#endif
return (1);
}
- tcp_set_hpts(inp);
+ tcp_set_hpts(tp);
sack_filter_clear(&rack->r_ctl.rack_sf, th->th_ack);
}
if (thflags & TH_FIN)
@@ -16988,7 +16988,7 @@
rack_free_trim(rack);
} else if ((no_output == 1) &&
(nxt_pkt == 0) &&
- (tcp_in_hpts(rack->rc_inp) == 0)) {
+ (tcp_in_hpts(rack->rc_tp) == 0)) {
/*
* We are not in hpts and we had a pacing timer up. Use
* the remaining time (slot_remaining) to restart the timer.
@@ -16998,8 +16998,8 @@
rack_free_trim(rack);
}
/* Clear the flag, it may have been cleared by output but we may not have */
- if ((nxt_pkt == 0) && (inp->inp_hpts_calls))
- inp->inp_hpts_calls = 0;
+ if ((nxt_pkt == 0) && (tp->t_flags2 & TF2_HPTS_CALLS))
+ tp->t_flags2 &= ~TF2_HPTS_CALLS;
/* Update any rounds needed */
if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp))
rack_log_hystart_event(rack, high_seq, 8);
@@ -17033,13 +17033,13 @@
/* We could not send (probably in the hpts but stopped the timer earlier)? */
if ((tp->snd_max == tp->snd_una) &&
((tp->t_flags & TF_DELACK) == 0) &&
- (tcp_in_hpts(rack->rc_inp)) &&
+ (tcp_in_hpts(rack->rc_tp)) &&
(rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT)) {
/* keep alive not needed if we are hptsi output yet */
;
} else {
int late = 0;
- if (tcp_in_hpts(inp)) {
+ if (tcp_in_hpts(tp)) {
if (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) {
us_cts = tcp_get_usecs(NULL);
if (TSTMP_GT(rack->r_ctl.rc_last_output_to, us_cts)) {
@@ -17049,7 +17049,7 @@
late = 1;
rack->r_ctl.rc_hpts_flags &= ~PACE_PKT_OUTPUT;
}
- tcp_hpts_remove(inp);
+ tcp_hpts_remove(tp);
}
if (late && (did_out == 0)) {
/*
@@ -18063,7 +18063,7 @@
struct timeval tv;
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
- log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
+ log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
log.u_bbr.flex1 = error;
log.u_bbr.flex2 = flags;
log.u_bbr.flex3 = rsm_is_null;
@@ -18328,7 +18328,7 @@
err = in_pcbquery_txrtlmt(rack->rc_inp, &p_rate);
#endif
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
- log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
+ log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
log.u_bbr.flex1 = p_rate;
log.u_bbr.flex2 = p_queue;
log.u_bbr.flex4 = (uint32_t)rack->r_ctl.crte->using;
@@ -18393,7 +18393,7 @@
out:
if (tcp_bblogging_on(tp)) {
memset(&log, 0, sizeof(log));
- log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
+ log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
log.u_bbr.flex1 = p_rate;
log.u_bbr.flex2 = p_queue;
log.u_bbr.flex4 = (uint32_t)rack->r_ctl.crte->using;
@@ -18758,7 +18758,7 @@
counter_u64_add(rack_collapsed_win_rxt_bytes, (rsm->r_end - rsm->r_start));
}
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
- log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
+ log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
if (rack->rack_no_prr)
log.u_bbr.flex1 = 0;
else
@@ -19291,7 +19291,7 @@
union tcp_log_stackspecific log;
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
- log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
+ log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
if (rack->rack_no_prr)
log.u_bbr.flex1 = 0;
else
@@ -19623,7 +19623,7 @@
uint32_t cts, ms_cts, delayed, early;
uint16_t add_flag = RACK_SENT_SP;
/* The doing_tlp flag will be set by the actual rack_timeout_tlp() */
- uint8_t hpts_calling, doing_tlp = 0;
+ uint8_t doing_tlp = 0;
uint32_t cwnd_to_use, pace_max_seg;
int32_t do_a_prefetch = 0;
int32_t prefetch_rsm = 0;
@@ -19641,7 +19641,7 @@
struct ip6_hdr *ip6 = NULL;
int32_t isipv6;
#endif
- bool hw_tls = false;
+ bool hpts_calling, hw_tls = false;
NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(inp);
@@ -19652,8 +19652,8 @@
sched_pin();
ts_val = get_cyclecount();
#endif
- hpts_calling = inp->inp_hpts_calls;
- rack->rc_inp->inp_hpts_calls = 0;
+ hpts_calling = !!(tp->t_flags2 & TF2_HPTS_CALLS);
+ tp->t_flags2 &= ~TF2_HPTS_CALLS;
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE) {
#ifdef TCP_ACCOUNTING
@@ -19696,7 +19696,7 @@
cts = tcp_get_usecs(&tv);
ms_cts = tcp_tv_to_mssectick(&tv);
if (((rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) == 0) &&
- tcp_in_hpts(rack->rc_inp)) {
+ tcp_in_hpts(rack->rc_tp)) {
/*
* We are on the hpts for some timer but not hptsi output.
* Remove from the hpts unconditionally.
@@ -19730,7 +19730,7 @@
}
}
if (rack->rc_in_persist) {
- if (tcp_in_hpts(rack->rc_inp) == 0) {
+ if (tcp_in_hpts(rack->rc_tp) == 0) {
/* Timer is not running */
rack_start_hpts_timer(rack, tp, cts, 0, 0, 0);
}
@@ -19742,7 +19742,7 @@
if ((rack->rc_ack_required == 1) &&
(rack->r_timer_override == 0)){
/* A timeout occurred and no ack has arrived */
- if (tcp_in_hpts(rack->rc_inp) == 0) {
+ if (tcp_in_hpts(rack->rc_tp) == 0) {
/* Timer is not running */
rack_start_hpts_timer(rack, tp, cts, 0, 0, 0);
}
@@ -19756,9 +19756,9 @@
(delayed) ||
(tp->t_state < TCPS_ESTABLISHED)) {
rack->rc_ack_can_sendout_data = 0;
- if (tcp_in_hpts(rack->rc_inp))
- tcp_hpts_remove(rack->rc_inp);
- } else if (tcp_in_hpts(rack->rc_inp)) {
+ if (tcp_in_hpts(rack->rc_tp))
+ tcp_hpts_remove(rack->rc_tp);
+ } else if (tcp_in_hpts(rack->rc_tp)) {
/*
* On the hpts you can't pass even if ACKNOW is on, we will
* when the hpts fires.
@@ -21672,7 +21672,7 @@
union tcp_log_stackspecific log;
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
- log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
+ log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
if (rack->rack_no_prr)
log.u_bbr.flex1 = 0;
else
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -2148,7 +2148,7 @@
struct timeval tv;
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
- log.u_bbr.inhpts = inp->inp_in_hpts;
+ log.u_bbr.inhpts = tcp_in_hpts(tp);
log.u_bbr.flex8 = 4;
log.u_bbr.pkts_out = tp->t_maxseg;
log.u_bbr.timeStamp = tcp_get_usecs(&tv);
@@ -2315,11 +2315,7 @@
*/
inp->inp_ip_ttl = V_ip_defttl;
#ifdef TCPHPTS
- /*
- * If using hpts lets drop a random number in so
- * not all new connections fall on the same CPU.
- */
- inp->inp_hpts_cpu = hpts_random_cpu(inp);
+ tcp_hpts_init(tp);
#endif
#ifdef TCPPCAP
/*
@@ -2438,6 +2434,7 @@
if (tp->t_fb->tfb_tcp_fb_fini)
(*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
+ MPASS(!tcp_in_hpts(tp));
/*
* If we got enough samples through the srtt filter,
@@ -2530,7 +2527,7 @@
tp->t_tfo_pending = NULL;
}
#ifdef TCPHPTS
- tcp_hpts_remove(inp);
+ tcp_hpts_remove(tp);
#endif
in_pcbdrop(inp);
TCPSTAT_INC(tcps_closed);
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -1712,7 +1712,7 @@
*/
#ifdef TCPHPTS
/* Assure that we are not on any hpts */
- tcp_hpts_remove(tptoinpcb(tp));
+ tcp_hpts_remove(tp);
#endif
if (blk->tfb_tcp_fb_init) {
error = (*blk->tfb_tcp_fb_init)(tp, &ptr);
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -314,6 +314,23 @@
sbintime_t t_timers[TT_N];
sbintime_t t_precisions[TT_N];
+ /* HPTS. Used by BBR and Rack stacks. See tcp_hpts.c for more info. */
+ TAILQ_ENTRY(tcpcb) t_hpts; /* linkage to HPTS ring */
+ STAILQ_HEAD(, mbuf) t_inqueue; /* HPTS input packets queue */
+ uint32_t t_hpts_request; /* Current hpts request, zero if
+ * fits in the pacing window. */
+ uint32_t t_hpts_slot; /* HPTS wheel slot this tcb is. */
+ uint32_t t_hpts_drop_reas; /* Reason we are dropping the pcb. */
+ uint32_t t_hpts_gencnt;
+ uint16_t t_hpts_cpu; /* CPU chosen by hpts_cpuid(). */
+ uint16_t t_lro_cpu; /* CPU derived from LRO. */
+#define HPTS_CPU_NONE ((uint16_t)-1)
+ enum {
+ IHPTS_NONE = 0,
+ IHPTS_ONQUEUE,
+ IHPTS_MOVING,
+ } t_in_hpts; /* Is it linked into HPTS? */
+
uint32_t t_maxseg:24, /* maximum segment size */
_t_logstate:8; /* State of "black box" logging */
uint32_t t_port:16, /* Tunneling (over udp) port */
@@ -355,7 +372,6 @@
int t_segqlen; /* segment reassembly queue length */
uint32_t t_segqmbuflen; /* total reassembly queue byte length */
struct tsegqe_head t_segq; /* segment reassembly queue */
- STAILQ_HEAD(, mbuf) t_inqueue; /* HPTS input queue */
uint32_t snd_ssthresh; /* snd_cwnd size threshold for
* for slow start exponential to
* linear switch
@@ -832,9 +848,11 @@
#define TF2_ECN_SND_CWR 0x00000040 /* ECN CWR in queue */
#define TF2_ECN_SND_ECE 0x00000080 /* ECN ECE in queue */
#define TF2_ACE_PERMIT 0x00000100 /* Accurate ECN mode */
+#define TF2_HPTS_CPU_SET 0x00000200 /* t_hpts_cpu is not random */
#define TF2_FBYTES_COMPLETE 0x00000400 /* We have first bytes in and out */
#define TF2_ECN_USE_ECT1 0x00000800 /* Use ECT(1) marking on session */
#define TF2_TCP_ACCOUNTING 0x00010000 /* Do TCP accounting */
+#define TF2_HPTS_CALLS 0x00020000 /* tcp_output() called via HPTS */
/*
* Structure to hold TCP options that are only used during segment
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Apr 30, 10:04 AM (19 h, 4 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
17855761
Default Alt Text
D39697.diff (54 KB)
Attached To
Mode
D39697: tcp_hpts: move HPTS related fields from inpcb to tcpcb
Attached
Detach File
Event Timeline
Log In to Comment