Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F115507651
D26672.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
21 KB
Referenced Files
None
Subscribers
None
D26672.diff
View Options
Index: sys/netinet/in_pcb.h
===================================================================
--- sys/netinet/in_pcb.h
+++ sys/netinet/in_pcb.h
@@ -129,6 +129,24 @@
#define inc6_zoneid inc_ie.ie6_zoneid
#if defined(_KERNEL) || defined(_WANT_INPCB)
+
+#define LBSTATE_HASHSIZE 32
+#define LBSTATE_HASHMASK (LBSTATE_HASHSIZE - 1)
+#define INP_LBSTATE_HASH(g, h) ((g)->il_htbl[(h) & LBSTATE_HASHMASK])
+
+VNET_DECLARE(int, udp_lbstate_lifetime);
+#define V_udp_lbstate_lifetime VNET(udp_lbstate_lifetime)
+
+struct inpcb_lbstate {
+ CK_LIST_ENTRY(inpcb_lbstate) pcbchain;
+ CK_LIST_ENTRY(inpcb_lbstate) grpchain;
+ struct in_endpoints ie;
+ uint32_t ts;
+ struct inpcb *inp;
+ struct epoch_context epoch_ctx;
+};
+CK_LIST_HEAD(inpcb_lbstatehead, inpcb_lbstate);
+
/*
* struct inpcb captures the network layer state for TCP, UDP, and raw IPv4 and
* IPv6 sockets. In the case of TCP and UDP, further per-connection state is
@@ -294,6 +312,9 @@
struct label *inp_label; /* (i) MAC label */
struct inpcbpolicy *inp_sp; /* (s) for IPSEC */
+ struct inpcb_lbstatehead inp_lbstates; /* LB states chain */
+ uint32_t inp_lbscnt; /* LB states count */
+
/* Protocol-dependent part; options. */
struct {
u_char inp_ip_tos; /* (i) type of service proto */
@@ -572,6 +593,16 @@
#define il6_laddr il_dependladdr.id6_addr
uint32_t il_inpsiz; /* max count in il_inp[] (h) */
uint32_t il_inpcnt; /* cur count in il_inp[] (h) */
+ struct inpcbinfo *il_pcbinfo;
+
+ /*
+ * callout, lock and htbl are allocated only on creating of
+ * new load balance group. Then on resize they are inherited.
+ */
+ struct callout *il_callout;
+ struct mtx *il_lock;
+ struct inpcb_lbstatehead *il_htbl;
+
struct inpcb *il_inp[]; /* (h) */
};
@@ -821,6 +852,8 @@
void in_pcbgroup_update(struct inpcb *);
void in_pcbgroup_update_mbuf(struct inpcb *, struct mbuf *);
+void in_pcblbstate_update(struct inpcb *, const struct in_addr *, uint16_t,
+ const struct in_addr *, uint16_t);
void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
int in_pcballoc(struct socket *, struct inpcbinfo *);
int in_pcbbind(struct inpcb *, struct sockaddr *, struct ucred *);
Index: sys/netinet/in_pcb.c
===================================================================
--- sys/netinet/in_pcb.c
+++ sys/netinet/in_pcb.c
@@ -115,6 +115,24 @@
static struct callout ipport_tick_callout;
+#if 0
+#define LBDEBUG(fmt, ...) do { \
+ printf("%s: " fmt "\n", __func__, ## __VA_ARGS__); \
+} while (0)
+#define LBDEBUG1(fmt, ...) do { \
+ char _addr[50]; \
+ printf("%s: " fmt "\n", __func__, ## __VA_ARGS__); \
+} while (0)
+#define LBDEBUG2(fmt, ...) do { \
+ char _laddr[50], _faddr[50]; \
+ printf("%s: " fmt "\n", __func__, ## __VA_ARGS__); \
+} while (0)
+#else
+#define LBDEBUG(fmt, ...)
+#define LBDEBUG1(fmt, ...)
+#define LBDEBUG2(fmt, ...)
+#endif
+
/*
* These configure the range of local port addresses assigned to
* "unspecified" outgoing connections/packets/whatever.
@@ -145,6 +163,7 @@
#define V_ipport_tcplastcount VNET(ipport_tcplastcount)
static void in_pcbremlists(struct inpcb *inp);
+static void in_pcblbstate_free(epoch_context_t ctx);
#ifdef INET
static struct inpcb *in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo,
struct in_addr faddr, u_int fport_arg,
@@ -265,6 +284,53 @@
}
static void
+lbstate_tick(void *arg)
+{
+ struct inpcblbgroup *grp;
+ struct inpcb_lbstate *s, *ts;
+ int i;
+
+ grp = arg;
+ CURVNET_SET(grp->il_pcbinfo->ipi_vnet);
+ for (i = 0; i < LBSTATE_HASHSIZE; i++) {
+ CK_LIST_FOREACH_SAFE(s, &grp->il_htbl[i], grpchain, ts) {
+ if (time_uptime - s->ts < V_udp_lbstate_lifetime)
+ continue;
+#if 0
+ switch (INP_SOCKAF(s->inp->inp_socket)) {
+ case AF_INET:
+ LBDEBUG2("expired state: %s:%u -> %s:%u",
+ inet_ntop(AF_INET,
+ &s->ie.ie_laddr, _laddr, sizeof(_laddr)),
+ ntohs(s->ie.ie_lport),
+ inet_ntop(AF_INET,
+ &s->ie.ie_faddr, _faddr, sizeof(_faddr)),
+ ntohs(s->ie.ie_fport));
+ break;
+ case AF_INET6:
+ LBDEBUG2("expired state: %s:%u -> %s:%u",
+ inet_ntop(AF_INET6,
+ &s->ie.ie6_laddr, _laddr, sizeof(_laddr)),
+ ntohs(s->ie.ie_lport),
+ inet_ntop(AF_INET6,
+ &s->ie.ie6_faddr, _faddr, sizeof(_faddr)),
+ ntohs(s->ie.ie_fport));
+ break;
+ default:
+ LBDEBUG("expired state");
+ }
+#endif
+ CK_LIST_REMOVE(s, grpchain);
+ CK_LIST_REMOVE(s, pcbchain);
+ s->inp->inp_lbscnt--;
+ NET_EPOCH_CALL(in_pcblbstate_free, &s->epoch_ctx);
+ }
+ }
+ CURVNET_RESTORE();
+ callout_reset(grp->il_callout, hz, lbstate_tick, grp);
+}
+
+static void
in_pcblbgroup_free_deferred(epoch_context_t ctx)
{
struct inpcblbgroup *grp;
@@ -281,7 +347,46 @@
NET_EPOCH_CALL(in_pcblbgroup_free_deferred, &grp->il_epoch_ctx);
}
+
static struct inpcblbgroup *
+in_pcblbgroup_alloc0(struct inpcblbgrouphead *hdr, u_char vflag,
+ uint16_t port, const union in_dependaddr *addr, int size)
+{
+ struct inpcblbgroup *grp;
+
+ grp = in_pcblbgroup_alloc(hdr, vflag, port, addr, size);
+ if (grp == NULL)
+ return (NULL);
+ grp->il_lock = malloc(sizeof(*grp->il_lock), M_PCB, M_NOWAIT);
+ if (grp->il_lock == NULL) {
+ in_pcblbgroup_free(grp);
+ return (NULL);
+ }
+ grp->il_callout = malloc(sizeof(*grp->il_callout), M_PCB,
+ M_NOWAIT);
+ if (grp->il_callout == NULL) {
+ free(grp->il_lock, M_PCB);
+ in_pcblbgroup_free(grp);
+ return (NULL);
+ }
+ grp->il_htbl = malloc(sizeof(*grp->il_htbl) * LBSTATE_HASHSIZE,
+ M_PCB, M_NOWAIT | M_ZERO);
+ if (grp->il_htbl == NULL) {
+ free(grp->il_callout, M_PCB);
+ free(grp->il_lock, M_PCB);
+ in_pcblbgroup_free(grp);
+ return (NULL);
+ }
+ mtx_init(grp->il_lock, "inplbhtbl", NULL, MTX_DEF | MTX_NEW);
+ callout_init_mtx(grp->il_callout, grp->il_lock, 0);
+ mtx_lock(grp->il_lock);
+ callout_reset(grp->il_callout, hz, lbstate_tick, grp);
+ mtx_unlock(grp->il_lock);
+ LBDEBUG("new lbgroup for port %u", ntohs(port));
+ return (grp);
+}
+
+static struct inpcblbgroup *
in_pcblbgroup_resize(struct inpcblbgrouphead *hdr,
struct inpcblbgroup *old_grp, int size)
{
@@ -299,7 +404,17 @@
for (i = 0; i < old_grp->il_inpcnt; ++i)
grp->il_inp[i] = old_grp->il_inp[i];
+ grp->il_pcbinfo = old_grp->il_pcbinfo;
grp->il_inpcnt = old_grp->il_inpcnt;
+ /* Inherit lock and htbl from old group */
+ grp->il_lock = old_grp->il_lock;
+ grp->il_htbl = old_grp->il_htbl;
+ grp->il_callout = old_grp->il_callout;
+
+ /* Reschedule callout with new group */
+ mtx_lock(grp->il_lock);
+ callout_reset(grp->il_callout, hz, lbstate_tick, grp);
+ mtx_unlock(grp->il_lock);
in_pcblbgroup_free(old_grp);
return (grp);
}
@@ -375,11 +490,12 @@
}
if (grp == NULL) {
/* Create new load balance group. */
- grp = in_pcblbgroup_alloc(hdr, inp->inp_vflag,
+ grp = in_pcblbgroup_alloc0(hdr, inp->inp_vflag,
inp->inp_lport, &inp->inp_inc.inc_ie.ie_dependladdr,
INPCBLBGROUP_SIZMIN);
if (grp == NULL)
return (ENOBUFS);
+ grp->il_pcbinfo = pcbinfo;
} else if (grp->il_inpcnt == grp->il_inpsiz) {
if (grp->il_inpsiz >= INPCBLBGROUP_SIZMAX) {
if (ratecheck(&lastprint, &interval))
@@ -403,7 +519,57 @@
return (0);
}
+static void
+in_pcblbstate_free(epoch_context_t ctx)
+{
+ struct inpcb_lbstate *s;
+
+ s = __containerof(ctx, struct inpcb_lbstate, epoch_ctx);
+ free(s, M_PCB);
+}
+
/*
+ * Free LB states releated to PCB.
+ */
+static void
+in_pcblbstates_destroy(struct inpcblbgroup *grp, struct inpcb *inp)
+{
+ struct inpcb_lbstate *s;
+
+ mtx_lock(grp->il_lock);
+ while (!CK_LIST_EMPTY(&inp->inp_lbstates)) {
+ s = CK_LIST_FIRST(&inp->inp_lbstates);
+ CK_LIST_REMOVE(s, grpchain);
+ CK_LIST_REMOVE(s, pcbchain);
+ NET_EPOCH_CALL(in_pcblbstate_free, &s->epoch_ctx);
+ }
+ mtx_unlock(grp->il_lock);
+}
+
+static void
+in_pcblbhtbl_free(struct inpcblbgroup *grp)
+{
+ struct inpcb_lbstate *s;
+ int i;
+
+ mtx_lock(grp->il_lock);
+ callout_stop(grp->il_callout);
+ for (i = 0; i < LBSTATE_HASHSIZE; i++) {
+ while (!CK_LIST_EMPTY(&grp->il_htbl[i])) {
+ s = CK_LIST_FIRST(&grp->il_htbl[i]);
+ CK_LIST_REMOVE(s, grpchain);
+ CK_LIST_REMOVE(s, pcbchain);
+ NET_EPOCH_CALL(in_pcblbstate_free, &s->epoch_ctx);
+ }
+ }
+ mtx_unlock(grp->il_lock);
+ mtx_destroy(grp->il_lock);
+ free(grp->il_callout, M_PCB);
+ free(grp->il_lock, M_PCB);
+ free(grp->il_htbl, M_PCB);
+}
+
+/*
* Remove PCB from load balance group.
*/
static void
@@ -428,10 +594,12 @@
if (grp->il_inpcnt == 1) {
/* We are the last, free this local group. */
+ in_pcblbhtbl_free(grp);
in_pcblbgroup_free(grp);
} else {
/* Pull up inpcbs, shrink group if possible. */
in_pcblbgroup_reorder(hdr, &grp, i);
+ in_pcblbstates_destroy(grp, inp);
}
return;
}
@@ -2005,31 +2173,28 @@
}
#undef INP_LOOKUP_MAPPED_PCB_COST
-static struct inpcb *
-in_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
- const struct in_addr *laddr, uint16_t lport, const struct in_addr *faddr,
- uint16_t fport, int lookupflags)
+static struct inpcblbgroup *
+in_pcblbgroup_lookup(const struct inpcbinfo *pcbinfo,
+ const struct in_addr *laddr, uint16_t lport, const struct in_addr *faddr,
+ uint16_t fport, int lookupflags)
{
- struct inpcb *local_wild;
const struct inpcblbgrouphead *hdr;
- struct inpcblbgroup *grp;
- uint32_t idx;
+ struct inpcblbgroup *grp, *grp_local_wild;
- INP_HASH_LOCK_ASSERT(pcbinfo);
-
- hdr = &pcbinfo->ipi_lbgrouphashbase[
- INP_PCBPORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)];
-
/*
- * Order of socket selection:
+ * Order of group selection:
* 1. non-wild.
* 2. wild (if lookupflags contains INPLOOKUP_WILDCARD).
*
* NOTE:
* - Load balanced group does not contain jailed sockets
- * - Load balanced group does not contain IPv4 mapped INET6 wild sockets
+ * - Load balanced group does not contain IPv4 mapped INET6
+ * wild sockets
*/
- local_wild = NULL;
+ INP_HASH_LOCK_ASSERT(pcbinfo);
+ hdr = &pcbinfo->ipi_lbgrouphashbase[
+ INP_PCBPORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)];
+ grp_local_wild = NULL;
CK_LIST_FOREACH(grp, hdr, il_list) {
#ifdef INET6
if (!(grp->il_vflag & INP_IPV4))
@@ -2038,15 +2203,123 @@
if (grp->il_lport != lport)
continue;
- idx = INP_PCBLBGROUP_PKTHASH(faddr->s_addr, lport, fport) %
- grp->il_inpcnt;
if (grp->il_laddr.s_addr == laddr->s_addr)
- return (grp->il_inp[idx]);
+ return (grp);
+
if (grp->il_laddr.s_addr == INADDR_ANY &&
- (lookupflags & INPLOOKUP_WILDCARD) != 0)
- local_wild = grp->il_inp[idx];
+ (lookupflags & INPLOOKUP_WILDCARD))
+ grp_local_wild = grp;
}
- return (local_wild);
+ return (grp_local_wild);
+}
+
+static struct inpcb_lbstate *
+in_pcblbstate_lookup(struct inpcblbgroup *grp, uint32_t hash,
+ const struct in_addr *faddr, uint16_t fport)
+{
+ struct inpcb_lbstate *s;
+
+ NET_EPOCH_ASSERT();
+ CK_LIST_FOREACH(s, &INP_LBSTATE_HASH(grp, hash), grpchain) {
+ if (s->ie.ie_faddr.s_addr == faddr->s_addr &&
+ s->ie.ie_fport == fport &&
+ s->ie.ie_laddr.s_addr == grp->il_laddr.s_addr &&
+ s->ie.ie_lport == grp->il_lport) {
+ LBDEBUG2("matched state: %s:%u -> %s:%u",
+ inet_ntop(AF_INET,
+ &grp->il_laddr, _laddr, sizeof(_laddr)),
+ grp->il_lport,
+ inet_ntop(AF_INET,
+ faddr, _faddr, sizeof(_faddr)),
+ fport);
+ break;
+ }
+ }
+ return (s);
+}
+
+void
+in_pcblbstate_update(struct inpcb *inp, const struct in_addr *laddr,
+ uint16_t lport, const struct in_addr *faddr, uint16_t fport)
+{
+ struct inpcblbgroup *grp;
+ struct inpcb_lbstate *s;
+ uint32_t hash;
+
+ grp = in_pcblbgroup_lookup(inp->inp_pcbinfo, laddr, lport, faddr,
+ fport, INPLOOKUP_WILDCARD);
+ if (grp == NULL)
+ return;
+ /* Update timestamp if state is existing */
+ hash = INP_PCBLBGROUP_PKTHASH(faddr->s_addr, lport, fport);
+ NET_EPOCH_ASSERT();
+ s = in_pcblbstate_lookup(grp, hash, faddr, fport);
+ if (s != NULL) {
+ s->ts = time_uptime;
+ MPASS(s->inp == inp);
+ return;
+ }
+
+ /* No state found. Try to allocate one. */
+ s = malloc(sizeof(*s), M_PCB, M_ZERO | M_NOWAIT);
+ if (s == NULL)
+ return;
+
+ s->ie.ie_faddr.s_addr = faddr->s_addr;
+ s->ie.ie_laddr.s_addr = laddr->s_addr;
+ s->ie.ie_fport = fport;
+ s->ie.ie_lport = lport;
+ s->ts = time_uptime;
+ s->inp = inp;
+
+ mtx_lock(grp->il_lock);
+ if (in_pcblbstate_lookup(grp, hash, faddr, fport) == NULL) {
+ CK_LIST_INSERT_HEAD(&INP_LBSTATE_HASH(grp, hash),
+ s, grpchain);
+ CK_LIST_INSERT_HEAD(&inp->inp_lbstates, s, pcbchain);
+ inp->inp_lbscnt++;
+ LBDEBUG2("new state: %s:%u -> %s:%u",
+ inet_ntop(AF_INET,
+ &grp->il_laddr, _laddr, sizeof(_laddr)),
+ ntohs(grp->il_lport),
+ inet_ntop(AF_INET,
+ faddr, _faddr, sizeof(_faddr)),
+ ntohs(fport));
+ } else {
+ free(s, M_PCB);
+ }
+ mtx_unlock(grp->il_lock);
+}
+
+static struct inpcb *
+in_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
+ const struct in_addr *laddr, uint16_t lport, const struct in_addr *faddr,
+ uint16_t fport, int lookupflags)
+{
+ struct inpcb_lbstate *s;
+ struct inpcblbgroup *grp;
+ struct inpcb *inp;
+ uint32_t hash;
+ int i;
+
+ grp = in_pcblbgroup_lookup(pcbinfo, laddr, lport, faddr,
+ fport, lookupflags);
+ if (grp == NULL)
+ return (NULL);
+
+ hash = INP_PCBLBGROUP_PKTHASH(faddr->s_addr, lport, fport);
+ s = in_pcblbstate_lookup(grp, hash, faddr, fport);
+ if (s == NULL) {
+ inp = grp->il_inp[hash % grp->il_inpcnt];
+ for (i = 0; i < grp->il_inpcnt; i++) {
+ if (inp->inp_lbscnt == 0)
+ break;
+ if (inp->inp_lbscnt > grp->il_inp[i]->inp_lbscnt)
+ inp = grp->il_inp[i];
+ }
+ } else
+ inp = s->inp;
+ return (inp);
}
#ifdef PCBGROUP
Index: sys/netinet/udp_usrreq.c
===================================================================
--- sys/netinet/udp_usrreq.c
+++ sys/netinet/udp_usrreq.c
@@ -157,6 +157,26 @@
SYSCTL_VNET_PCPUSTAT(_net_inet_udp, UDPCTL_STATS, stats, struct udpstat,
udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
+VNET_DEFINE(int, udp_lbstate_lifetime) = 120;
+
+static int
+sysctl_lbstate(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+
+ error = sysctl_handle_int(oidp, arg1, arg2, req);
+ if (error == 0) {
+ if (V_udp_lbstate_lifetime < 5)
+ V_udp_lbstate_lifetime = 5;
+ }
+ return (error);
+}
+
+SYSCTL_PROC(_net_inet_udp, OID_AUTO, lbstate_lifetime,
+ CTLFLAG_VNET | CTLTYPE_INT |CTLFLAG_RW,
+ &VNET_NAME(udp_lbstate_lifetime), 0, &sysctl_lbstate, "I",
+ "The life time for load balancing states");
+
#ifdef VIMAGE
VNET_PCPUSTAT_SYSUNINIT(udpstat);
#endif /* VIMAGE */
@@ -1522,6 +1542,11 @@
*/
ipflags |= IP_NODEFAULTFLOWID;
#endif /* RSS */
+
+ /* LB states handling */
+ if (inp_so_options(inp) & SO_REUSEPORT_LB) {
+ in_pcblbstate_update(inp, &laddr, lport, &faddr, fport);
+ }
if (pr == IPPROTO_UDPLITE)
UDPLITE_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
Index: sys/netinet6/in6_pcb.h
===================================================================
--- sys/netinet6/in6_pcb.h
+++ sys/netinet6/in6_pcb.h
@@ -107,6 +107,8 @@
void in6_pcbnotify(struct inpcbinfo *, struct sockaddr *,
u_int, const struct sockaddr *, u_int, int, void *,
struct inpcb *(*)(struct inpcb *, int));
+void in6_pcblbstate_update(struct inpcb *, const struct in6_addr *,
+ uint16_t, const struct in6_addr *, uint16_t);
struct inpcb *
in6_rtchange(struct inpcb *, int);
struct sockaddr *
Index: sys/netinet6/in6_pcb.c
===================================================================
--- sys/netinet6/in6_pcb.c
+++ sys/netinet6/in6_pcb.c
@@ -113,6 +113,24 @@
#include <netinet6/in6_fib.h>
#include <netinet6/scope6_var.h>
+#if 0
+#define LBDEBUG(fmt, ...) do { \
+ printf("%s: " fmt "\n", __func__, ## __VA_ARGS__); \
+} while (0)
+#define LBDEBUG1(fmt, ...) do { \
+ char _addr[50]; \
+ printf("%s: " fmt "\n", __func__, ## __VA_ARGS__); \
+} while (0)
+#define LBDEBUG2(fmt, ...) do { \
+ char _laddr[50], _faddr[50]; \
+ printf("%s: " fmt "\n", __func__, ## __VA_ARGS__); \
+} while (0)
+#else
+#define LBDEBUG(fmt, ...)
+#define LBDEBUG1(fmt, ...)
+#define LBDEBUG2(fmt, ...)
+#endif
+
int
in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
struct ucred *cred)
@@ -887,31 +905,28 @@
return inp;
}
-static struct inpcb *
-in6_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
- const struct in6_addr *laddr, uint16_t lport, const struct in6_addr *faddr,
- uint16_t fport, int lookupflags)
+static struct inpcblbgroup *
+in6_pcblbgroup_lookup(const struct inpcbinfo *pcbinfo,
+ const struct in6_addr *laddr, uint16_t lport, const struct in6_addr *faddr,
+ uint16_t fport, int lookupflags)
{
- struct inpcb *local_wild;
const struct inpcblbgrouphead *hdr;
- struct inpcblbgroup *grp;
- uint32_t idx;
+ struct inpcblbgroup *grp, *grp_local_wild;
- INP_HASH_LOCK_ASSERT(pcbinfo);
-
- hdr = &pcbinfo->ipi_lbgrouphashbase[
- INP_PCBPORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)];
-
/*
- * Order of socket selection:
+ * Order of group selection:
* 1. non-wild.
* 2. wild (if lookupflags contains INPLOOKUP_WILDCARD).
*
* NOTE:
- * - Load balanced group does not contain jailed sockets.
- * - Load balanced does not contain IPv4 mapped INET6 wild sockets.
+ * - Load balanced group does not contain jailed sockets
+ * - Load balanced group does not contain IPv4 mapped INET6
+ * wild sockets
*/
- local_wild = NULL;
+ INP_HASH_LOCK_ASSERT(pcbinfo);
+ hdr = &pcbinfo->ipi_lbgrouphashbase[
+ INP_PCBPORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)];
+ grp_local_wild = NULL;
CK_LIST_FOREACH(grp, hdr, il_list) {
#ifdef INET
if (!(grp->il_vflag & INP_IPV6))
@@ -920,15 +935,123 @@
if (grp->il_lport != lport)
continue;
- idx = INP_PCBLBGROUP_PKTHASH(INP6_PCBHASHKEY(faddr), lport,
- fport) % grp->il_inpcnt;
if (IN6_ARE_ADDR_EQUAL(&grp->il6_laddr, laddr))
- return (grp->il_inp[idx]);
+ return (grp);
+
if (IN6_IS_ADDR_UNSPECIFIED(&grp->il6_laddr) &&
- (lookupflags & INPLOOKUP_WILDCARD) != 0)
- local_wild = grp->il_inp[idx];
+ (lookupflags & INPLOOKUP_WILDCARD))
+ grp_local_wild = grp;
}
- return (local_wild);
+ return (grp_local_wild);
+}
+
+static struct inpcb_lbstate *
+in6_pcblbstate_lookup(struct inpcblbgroup *grp, uint32_t hash,
+ const struct in6_addr *faddr, uint16_t fport)
+{
+ struct inpcb_lbstate *s;
+
+ NET_EPOCH_ASSERT();
+ CK_LIST_FOREACH(s, &INP_LBSTATE_HASH(grp, hash), grpchain) {
+ if (IN6_ARE_ADDR_EQUAL(&s->ie.ie6_faddr, faddr) &&
+ s->ie.ie_fport == fport &&
+ IN6_ARE_ADDR_EQUAL(&s->ie.ie6_laddr, &grp->il6_laddr) &&
+ s->ie.ie_lport == grp->il_lport) {
+ LBDEBUG2("matched state: %s:%u -> %s:%u",
+ inet_ntop(AF_INET6,
+ &grp->il6_laddr, _laddr, sizeof(_laddr)),
+ grp->il_lport,
+ inet_ntop(AF_INET6,
+ faddr, _faddr, sizeof(_faddr)),
+ fport);
+ break;
+ }
+ }
+ return (s);
+}
+
+void
+in6_pcblbstate_update(struct inpcb *inp, const struct in6_addr *laddr,
+ uint16_t lport, const struct in6_addr *faddr, uint16_t fport)
+{
+ struct inpcblbgroup *grp;
+ struct inpcb_lbstate *s;
+ uint32_t hash;
+
+ grp = in6_pcblbgroup_lookup(inp->inp_pcbinfo, laddr, lport, faddr,
+ fport, INPLOOKUP_WILDCARD);
+ if (grp == NULL)
+ return;
+
+ /* Update timestamp if state is existing */
+ hash = INP_PCBLBGROUP_PKTHASH(INP6_PCBHASHKEY(faddr), lport, fport);
+ s = in6_pcblbstate_lookup(grp, hash, faddr, fport);
+ if (s != NULL) {
+ s->ts = time_uptime;
+ MPASS(s->inp == inp);
+ return;
+ }
+
+ /* No state found. Try to allocate one. */
+ s = malloc(sizeof(*s), M_PCB, M_ZERO | M_NOWAIT);
+ if (s == NULL)
+ return;
+
+ s->ie.ie6_faddr = *faddr;
+ s->ie.ie6_laddr = *laddr;
+ s->ie.ie_fport = fport;
+ s->ie.ie_lport = lport;
+ s->ts = time_uptime;
+ s->inp = inp;
+
+ mtx_lock(grp->il_lock);
+ if (in6_pcblbstate_lookup(grp, hash, faddr, fport) == NULL) {
+ CK_LIST_INSERT_HEAD(&INP_LBSTATE_HASH(grp, hash),
+ s, grpchain);
+ CK_LIST_INSERT_HEAD(&inp->inp_lbstates, s, pcbchain);
+ inp->inp_lbscnt++;
+ LBDEBUG2("new state: %s:%u -> %s:%u",
+ inet_ntop(AF_INET6,
+ &grp->il6_laddr, _laddr, sizeof(_laddr)),
+ ntohs(grp->il_lport),
+ inet_ntop(AF_INET6,
+ faddr, _faddr, sizeof(_faddr)),
+ ntohs(fport));
+ } else {
+ free(s, M_PCB);
+ }
+ mtx_unlock(grp->il_lock);
+}
+
+static struct inpcb *
+in6_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
+ const struct in6_addr *laddr, uint16_t lport, const struct in6_addr *faddr,
+ uint16_t fport, int lookupflags)
+{
+ struct inpcb_lbstate *s;
+ struct inpcblbgroup *grp;
+ struct inpcb *inp;
+ uint32_t hash;
+ int i;
+
+ grp = in6_pcblbgroup_lookup(pcbinfo, laddr, lport, faddr,
+ fport, lookupflags);
+ if (grp == NULL)
+ return (NULL);
+
+ hash = INP_PCBLBGROUP_PKTHASH(INP6_PCBHASHKEY(faddr), lport, fport);
+ s = in6_pcblbstate_lookup(grp, hash, faddr, fport);
+ if (s == NULL) {
+ inp = grp->il_inp[hash % grp->il_inpcnt];
+ for (i = 0; i < grp->il_inpcnt; i++) {
+ if (inp->inp_lbscnt == 0)
+ break;
+ if (inp->inp_lbscnt > grp->il_inp[i]->inp_lbscnt)
+ inp = grp->il_inp[i];
+ }
+ } else
+ inp = s->inp;
+ return (inp);
}
#ifdef PCBGROUP
Index: sys/netinet6/udp6_usrreq.c
===================================================================
--- sys/netinet6/udp6_usrreq.c
+++ sys/netinet6/udp6_usrreq.c
@@ -992,6 +992,12 @@
}
#endif
+ /* LB states handling */
+ /* XXXAE: addresses have embedded zone id */
+ if (inp_so_options(inp) & SO_REUSEPORT_LB)
+ in6_pcblbstate_update(inp, laddr, inp->inp_lport,
+ faddr, fport);
+
UDPSTAT_INC(udps_opackets);
if (nxt == IPPROTO_UDPLITE)
UDPLITE_PROBE(send, NULL, inp, ip6, inp, udp6);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Apr 25, 3:29 PM (17 h, 45 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
17771982
Default Alt Text
D26672.diff (21 KB)
Attached To
Mode
D26672: [RFC] Implement sticky SO_REUSEPORT_LB socket option
Attached
Detach File
Event Timeline
Log In to Comment