Page MenuHomeFreeBSD

D26672.diff
No OneTemporary

D26672.diff

Index: sys/netinet/in_pcb.h
===================================================================
--- sys/netinet/in_pcb.h
+++ sys/netinet/in_pcb.h
@@ -129,6 +129,24 @@
#define inc6_zoneid inc_ie.ie6_zoneid
#if defined(_KERNEL) || defined(_WANT_INPCB)
+
+#define LBSTATE_HASHSIZE 32
+#define LBSTATE_HASHMASK (LBSTATE_HASHSIZE - 1)
+#define INP_LBSTATE_HASH(g, h) ((g)->il_htbl[(h) & LBSTATE_HASHMASK])
+
+VNET_DECLARE(int, udp_lbstate_lifetime);
+#define V_udp_lbstate_lifetime VNET(udp_lbstate_lifetime)
+
+struct inpcb_lbstate {
+ CK_LIST_ENTRY(inpcb_lbstate) pcbchain;
+ CK_LIST_ENTRY(inpcb_lbstate) grpchain;
+ struct in_endpoints ie;
+ uint32_t ts;
+ struct inpcb *inp;
+ struct epoch_context epoch_ctx;
+};
+CK_LIST_HEAD(inpcb_lbstatehead, inpcb_lbstate);
+
/*
* struct inpcb captures the network layer state for TCP, UDP, and raw IPv4 and
* IPv6 sockets. In the case of TCP and UDP, further per-connection state is
@@ -294,6 +312,9 @@
struct label *inp_label; /* (i) MAC label */
struct inpcbpolicy *inp_sp; /* (s) for IPSEC */
+ struct inpcb_lbstatehead inp_lbstates; /* LB states chain */
+ uint32_t inp_lbscnt; /* LB states count */
+
/* Protocol-dependent part; options. */
struct {
u_char inp_ip_tos; /* (i) type of service proto */
@@ -572,6 +593,16 @@
#define il6_laddr il_dependladdr.id6_addr
uint32_t il_inpsiz; /* max count in il_inp[] (h) */
uint32_t il_inpcnt; /* cur count in il_inp[] (h) */
+ struct inpcbinfo *il_pcbinfo;
+
+ /*
+ * callout, lock and htbl are allocated only on creating of
+ * new load balance group. Then on resize they are inherited.
+ */
+ struct callout *il_callout;
+ struct mtx *il_lock;
+ struct inpcb_lbstatehead *il_htbl;
+
struct inpcb *il_inp[]; /* (h) */
};
@@ -821,6 +852,8 @@
void in_pcbgroup_update(struct inpcb *);
void in_pcbgroup_update_mbuf(struct inpcb *, struct mbuf *);
+void in_pcblbstate_update(struct inpcb *, const struct in_addr *, uint16_t,
+ const struct in_addr *, uint16_t);
void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
int in_pcballoc(struct socket *, struct inpcbinfo *);
int in_pcbbind(struct inpcb *, struct sockaddr *, struct ucred *);
Index: sys/netinet/in_pcb.c
===================================================================
--- sys/netinet/in_pcb.c
+++ sys/netinet/in_pcb.c
@@ -115,6 +115,24 @@
static struct callout ipport_tick_callout;
+#if 0
+#define LBDEBUG(fmt, ...) do { \
+ printf("%s: " fmt "\n", __func__, ## __VA_ARGS__); \
+} while (0)
+#define LBDEBUG1(fmt, ...) do { \
+ char _addr[50]; \
+ printf("%s: " fmt "\n", __func__, ## __VA_ARGS__); \
+} while (0)
+#define LBDEBUG2(fmt, ...) do { \
+ char _laddr[50], _faddr[50]; \
+ printf("%s: " fmt "\n", __func__, ## __VA_ARGS__); \
+} while (0)
+#else
+#define LBDEBUG(fmt, ...)
+#define LBDEBUG1(fmt, ...)
+#define LBDEBUG2(fmt, ...)
+#endif
+
/*
* These configure the range of local port addresses assigned to
* "unspecified" outgoing connections/packets/whatever.
@@ -145,6 +163,7 @@
#define V_ipport_tcplastcount VNET(ipport_tcplastcount)
static void in_pcbremlists(struct inpcb *inp);
+static void in_pcblbstate_free(epoch_context_t ctx);
#ifdef INET
static struct inpcb *in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo,
struct in_addr faddr, u_int fport_arg,
@@ -265,6 +284,53 @@
}
static void
+lbstate_tick(void *arg)
+{
+ struct inpcblbgroup *grp;
+ struct inpcb_lbstate *s, *ts;
+ int i;
+
+ grp = arg;
+ CURVNET_SET(grp->il_pcbinfo->ipi_vnet);
+ for (i = 0; i < LBSTATE_HASHSIZE; i++) {
+ CK_LIST_FOREACH_SAFE(s, &grp->il_htbl[i], grpchain, ts) {
+ if (time_uptime - s->ts < V_udp_lbstate_lifetime)
+ continue;
+#if 0
+ switch (INP_SOCKAF(s->inp->inp_socket)) {
+ case AF_INET:
+ LBDEBUG2("expired state: %s:%u -> %s:%u",
+ inet_ntop(AF_INET,
+ &s->ie.ie_laddr, _laddr, sizeof(_laddr)),
+ ntohs(s->ie.ie_lport),
+ inet_ntop(AF_INET,
+ &s->ie.ie_faddr, _faddr, sizeof(_faddr)),
+ ntohs(s->ie.ie_fport));
+ break;
+ case AF_INET6:
+ LBDEBUG2("expired state: %s:%u -> %s:%u",
+ inet_ntop(AF_INET6,
+ &s->ie.ie6_laddr, _laddr, sizeof(_laddr)),
+ ntohs(s->ie.ie_lport),
+ inet_ntop(AF_INET6,
+ &s->ie.ie6_faddr, _faddr, sizeof(_faddr)),
+ ntohs(s->ie.ie_fport));
+ break;
+ default:
+ LBDEBUG("expired state");
+ }
+#endif
+ CK_LIST_REMOVE(s, grpchain);
+ CK_LIST_REMOVE(s, pcbchain);
+ s->inp->inp_lbscnt--;
+ NET_EPOCH_CALL(in_pcblbstate_free, &s->epoch_ctx);
+ }
+ }
+ CURVNET_RESTORE();
+ callout_reset(grp->il_callout, hz, lbstate_tick, grp);
+}
+
+static void
in_pcblbgroup_free_deferred(epoch_context_t ctx)
{
struct inpcblbgroup *grp;
@@ -281,7 +347,46 @@
NET_EPOCH_CALL(in_pcblbgroup_free_deferred, &grp->il_epoch_ctx);
}
+
static struct inpcblbgroup *
+in_pcblbgroup_alloc0(struct inpcblbgrouphead *hdr, u_char vflag,
+ uint16_t port, const union in_dependaddr *addr, int size)
+{
+ struct inpcblbgroup *grp;
+
+ grp = in_pcblbgroup_alloc(hdr, vflag, port, addr, size);
+ if (grp == NULL)
+ return (NULL);
+ grp->il_lock = malloc(sizeof(*grp->il_lock), M_PCB, M_NOWAIT);
+ if (grp->il_lock == NULL) {
+ in_pcblbgroup_free(grp);
+ return (NULL);
+ }
+ grp->il_callout = malloc(sizeof(*grp->il_callout), M_PCB,
+ M_NOWAIT);
+ if (grp->il_callout == NULL) {
+ free(grp->il_lock, M_PCB);
+ in_pcblbgroup_free(grp);
+ return (NULL);
+ }
+ grp->il_htbl = malloc(sizeof(*grp->il_htbl) * LBSTATE_HASHSIZE,
+ M_PCB, M_NOWAIT | M_ZERO);
+ if (grp->il_htbl == NULL) {
+ free(grp->il_callout, M_PCB);
+ free(grp->il_lock, M_PCB);
+ in_pcblbgroup_free(grp);
+ return (NULL);
+ }
+ mtx_init(grp->il_lock, "inplbhtbl", NULL, MTX_DEF | MTX_NEW);
+ callout_init_mtx(grp->il_callout, grp->il_lock, 0);
+ mtx_lock(grp->il_lock);
+ callout_reset(grp->il_callout, hz, lbstate_tick, grp);
+ mtx_unlock(grp->il_lock);
+ LBDEBUG("new lbgroup for port %u", ntohs(port));
+ return (grp);
+}
+
+static struct inpcblbgroup *
in_pcblbgroup_resize(struct inpcblbgrouphead *hdr,
struct inpcblbgroup *old_grp, int size)
{
@@ -299,7 +404,17 @@
for (i = 0; i < old_grp->il_inpcnt; ++i)
grp->il_inp[i] = old_grp->il_inp[i];
+ grp->il_pcbinfo = old_grp->il_pcbinfo;
grp->il_inpcnt = old_grp->il_inpcnt;
+ /* Inherit lock and htbl from old group */
+ grp->il_lock = old_grp->il_lock;
+ grp->il_htbl = old_grp->il_htbl;
+ grp->il_callout = old_grp->il_callout;
+
+ /* Reschedule callout with new group */
+ mtx_lock(grp->il_lock);
+ callout_reset(grp->il_callout, hz, lbstate_tick, grp);
+ mtx_unlock(grp->il_lock);
in_pcblbgroup_free(old_grp);
return (grp);
}
@@ -375,11 +490,12 @@
}
if (grp == NULL) {
/* Create new load balance group. */
- grp = in_pcblbgroup_alloc(hdr, inp->inp_vflag,
+ grp = in_pcblbgroup_alloc0(hdr, inp->inp_vflag,
inp->inp_lport, &inp->inp_inc.inc_ie.ie_dependladdr,
INPCBLBGROUP_SIZMIN);
if (grp == NULL)
return (ENOBUFS);
+ grp->il_pcbinfo = pcbinfo;
} else if (grp->il_inpcnt == grp->il_inpsiz) {
if (grp->il_inpsiz >= INPCBLBGROUP_SIZMAX) {
if (ratecheck(&lastprint, &interval))
@@ -403,7 +519,57 @@
return (0);
}
+static void
+in_pcblbstate_free(epoch_context_t ctx)
+{
+ struct inpcb_lbstate *s;
+
+ s = __containerof(ctx, struct inpcb_lbstate, epoch_ctx);
+ free(s, M_PCB);
+}
+
/*
+ * Free LB states releated to PCB.
+ */
+static void
+in_pcblbstates_destroy(struct inpcblbgroup *grp, struct inpcb *inp)
+{
+ struct inpcb_lbstate *s;
+
+ mtx_lock(grp->il_lock);
+ while (!CK_LIST_EMPTY(&inp->inp_lbstates)) {
+ s = CK_LIST_FIRST(&inp->inp_lbstates);
+ CK_LIST_REMOVE(s, grpchain);
+ CK_LIST_REMOVE(s, pcbchain);
+ NET_EPOCH_CALL(in_pcblbstate_free, &s->epoch_ctx);
+ }
+ mtx_unlock(grp->il_lock);
+}
+
+static void
+in_pcblbhtbl_free(struct inpcblbgroup *grp)
+{
+ struct inpcb_lbstate *s;
+ int i;
+
+ mtx_lock(grp->il_lock);
+ callout_stop(grp->il_callout);
+ for (i = 0; i < LBSTATE_HASHSIZE; i++) {
+ while (!CK_LIST_EMPTY(&grp->il_htbl[i])) {
+ s = CK_LIST_FIRST(&grp->il_htbl[i]);
+ CK_LIST_REMOVE(s, grpchain);
+ CK_LIST_REMOVE(s, pcbchain);
+ NET_EPOCH_CALL(in_pcblbstate_free, &s->epoch_ctx);
+ }
+ }
+ mtx_unlock(grp->il_lock);
+ mtx_destroy(grp->il_lock);
+ free(grp->il_callout, M_PCB);
+ free(grp->il_lock, M_PCB);
+ free(grp->il_htbl, M_PCB);
+}
+
+/*
* Remove PCB from load balance group.
*/
static void
@@ -428,10 +594,12 @@
if (grp->il_inpcnt == 1) {
/* We are the last, free this local group. */
+ in_pcblbhtbl_free(grp);
in_pcblbgroup_free(grp);
} else {
/* Pull up inpcbs, shrink group if possible. */
in_pcblbgroup_reorder(hdr, &grp, i);
+ in_pcblbstates_destroy(grp, inp);
}
return;
}
@@ -2005,31 +2173,28 @@
}
#undef INP_LOOKUP_MAPPED_PCB_COST
-static struct inpcb *
-in_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
- const struct in_addr *laddr, uint16_t lport, const struct in_addr *faddr,
- uint16_t fport, int lookupflags)
+static struct inpcblbgroup *
+in_pcblbgroup_lookup(const struct inpcbinfo *pcbinfo,
+ const struct in_addr *laddr, uint16_t lport, const struct in_addr *faddr,
+ uint16_t fport, int lookupflags)
{
- struct inpcb *local_wild;
const struct inpcblbgrouphead *hdr;
- struct inpcblbgroup *grp;
- uint32_t idx;
+ struct inpcblbgroup *grp, *grp_local_wild;
- INP_HASH_LOCK_ASSERT(pcbinfo);
-
- hdr = &pcbinfo->ipi_lbgrouphashbase[
- INP_PCBPORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)];
-
/*
- * Order of socket selection:
+ * Order of group selection:
* 1. non-wild.
* 2. wild (if lookupflags contains INPLOOKUP_WILDCARD).
*
* NOTE:
* - Load balanced group does not contain jailed sockets
- * - Load balanced group does not contain IPv4 mapped INET6 wild sockets
+ * - Load balanced group does not contain IPv4 mapped INET6
+ * wild sockets
*/
- local_wild = NULL;
+ INP_HASH_LOCK_ASSERT(pcbinfo);
+ hdr = &pcbinfo->ipi_lbgrouphashbase[
+ INP_PCBPORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)];
+ grp_local_wild = NULL;
CK_LIST_FOREACH(grp, hdr, il_list) {
#ifdef INET6
if (!(grp->il_vflag & INP_IPV4))
@@ -2038,15 +2203,123 @@
if (grp->il_lport != lport)
continue;
- idx = INP_PCBLBGROUP_PKTHASH(faddr->s_addr, lport, fport) %
- grp->il_inpcnt;
if (grp->il_laddr.s_addr == laddr->s_addr)
- return (grp->il_inp[idx]);
+ return (grp);
+
if (grp->il_laddr.s_addr == INADDR_ANY &&
- (lookupflags & INPLOOKUP_WILDCARD) != 0)
- local_wild = grp->il_inp[idx];
+ (lookupflags & INPLOOKUP_WILDCARD))
+ grp_local_wild = grp;
}
- return (local_wild);
+ return (grp_local_wild);
+}
+
+static struct inpcb_lbstate *
+in_pcblbstate_lookup(struct inpcblbgroup *grp, uint32_t hash,
+ const struct in_addr *faddr, uint16_t fport)
+{
+ struct inpcb_lbstate *s;
+
+ NET_EPOCH_ASSERT();
+ CK_LIST_FOREACH(s, &INP_LBSTATE_HASH(grp, hash), grpchain) {
+ if (s->ie.ie_faddr.s_addr == faddr->s_addr &&
+ s->ie.ie_fport == fport &&
+ s->ie.ie_laddr.s_addr == grp->il_laddr.s_addr &&
+ s->ie.ie_lport == grp->il_lport) {
+ LBDEBUG2("matched state: %s:%u -> %s:%u",
+ inet_ntop(AF_INET,
+ &grp->il_laddr, _laddr, sizeof(_laddr)),
+ grp->il_lport,
+ inet_ntop(AF_INET,
+ faddr, _faddr, sizeof(_faddr)),
+ fport);
+ break;
+ }
+ }
+ return (s);
+}
+
+void
+in_pcblbstate_update(struct inpcb *inp, const struct in_addr *laddr,
+ uint16_t lport, const struct in_addr *faddr, uint16_t fport)
+{
+ struct inpcblbgroup *grp;
+ struct inpcb_lbstate *s;
+ uint32_t hash;
+
+ grp = in_pcblbgroup_lookup(inp->inp_pcbinfo, laddr, lport, faddr,
+ fport, INPLOOKUP_WILDCARD);
+ if (grp == NULL)
+ return;
+ /* Update timestamp if state is existing */
+ hash = INP_PCBLBGROUP_PKTHASH(faddr->s_addr, lport, fport);
+ NET_EPOCH_ASSERT();
+ s = in_pcblbstate_lookup(grp, hash, faddr, fport);
+ if (s != NULL) {
+ s->ts = time_uptime;
+ MPASS(s->inp == inp);
+ return;
+ }
+
+ /* No state found. Try to allocate one. */
+ s = malloc(sizeof(*s), M_PCB, M_ZERO | M_NOWAIT);
+ if (s == NULL)
+ return;
+
+ s->ie.ie_faddr.s_addr = faddr->s_addr;
+ s->ie.ie_laddr.s_addr = laddr->s_addr;
+ s->ie.ie_fport = fport;
+ s->ie.ie_lport = lport;
+ s->ts = time_uptime;
+ s->inp = inp;
+
+ mtx_lock(grp->il_lock);
+ if (in_pcblbstate_lookup(grp, hash, faddr, fport) == NULL) {
+ CK_LIST_INSERT_HEAD(&INP_LBSTATE_HASH(grp, hash),
+ s, grpchain);
+ CK_LIST_INSERT_HEAD(&inp->inp_lbstates, s, pcbchain);
+ inp->inp_lbscnt++;
+ LBDEBUG2("new state: %s:%u -> %s:%u",
+ inet_ntop(AF_INET,
+ &grp->il_laddr, _laddr, sizeof(_laddr)),
+ ntohs(grp->il_lport),
+ inet_ntop(AF_INET,
+ faddr, _faddr, sizeof(_faddr)),
+ ntohs(fport));
+ } else {
+ free(s, M_PCB);
+ }
+ mtx_unlock(grp->il_lock);
+}
+
+static struct inpcb *
+in_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
+ const struct in_addr *laddr, uint16_t lport, const struct in_addr *faddr,
+ uint16_t fport, int lookupflags)
+{
+ struct inpcb_lbstate *s;
+ struct inpcblbgroup *grp;
+ struct inpcb *inp;
+ uint32_t hash;
+ int i;
+
+ grp = in_pcblbgroup_lookup(pcbinfo, laddr, lport, faddr,
+ fport, lookupflags);
+ if (grp == NULL)
+ return (NULL);
+
+ hash = INP_PCBLBGROUP_PKTHASH(faddr->s_addr, lport, fport);
+ s = in_pcblbstate_lookup(grp, hash, faddr, fport);
+ if (s == NULL) {
+ inp = grp->il_inp[hash % grp->il_inpcnt];
+ for (i = 0; i < grp->il_inpcnt; i++) {
+ if (inp->inp_lbscnt == 0)
+ break;
+ if (inp->inp_lbscnt > grp->il_inp[i]->inp_lbscnt)
+ inp = grp->il_inp[i];
+ }
+ } else
+ inp = s->inp;
+ return (inp);
}
#ifdef PCBGROUP
Index: sys/netinet/udp_usrreq.c
===================================================================
--- sys/netinet/udp_usrreq.c
+++ sys/netinet/udp_usrreq.c
@@ -157,6 +157,26 @@
SYSCTL_VNET_PCPUSTAT(_net_inet_udp, UDPCTL_STATS, stats, struct udpstat,
udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
+VNET_DEFINE(int, udp_lbstate_lifetime) = 120;
+
+static int
+sysctl_lbstate(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+
+ error = sysctl_handle_int(oidp, arg1, arg2, req);
+ if (error == 0) {
+ if (V_udp_lbstate_lifetime < 5)
+ V_udp_lbstate_lifetime = 5;
+ }
+ return (error);
+}
+
+SYSCTL_PROC(_net_inet_udp, OID_AUTO, lbstate_lifetime,
+ CTLFLAG_VNET | CTLTYPE_INT |CTLFLAG_RW,
+ &VNET_NAME(udp_lbstate_lifetime), 0, &sysctl_lbstate, "I",
+ "The life time for load balancing states");
+
#ifdef VIMAGE
VNET_PCPUSTAT_SYSUNINIT(udpstat);
#endif /* VIMAGE */
@@ -1522,6 +1542,11 @@
*/
ipflags |= IP_NODEFAULTFLOWID;
#endif /* RSS */
+
+ /* LB states handling */
+ if (inp_so_options(inp) & SO_REUSEPORT_LB) {
+ in_pcblbstate_update(inp, &laddr, lport, &faddr, fport);
+ }
if (pr == IPPROTO_UDPLITE)
UDPLITE_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
Index: sys/netinet6/in6_pcb.h
===================================================================
--- sys/netinet6/in6_pcb.h
+++ sys/netinet6/in6_pcb.h
@@ -107,6 +107,8 @@
void in6_pcbnotify(struct inpcbinfo *, struct sockaddr *,
u_int, const struct sockaddr *, u_int, int, void *,
struct inpcb *(*)(struct inpcb *, int));
+void in6_pcblbstate_update(struct inpcb *, const struct in6_addr *,
+ uint16_t, const struct in6_addr *, uint16_t);
struct inpcb *
in6_rtchange(struct inpcb *, int);
struct sockaddr *
Index: sys/netinet6/in6_pcb.c
===================================================================
--- sys/netinet6/in6_pcb.c
+++ sys/netinet6/in6_pcb.c
@@ -113,6 +113,24 @@
#include <netinet6/in6_fib.h>
#include <netinet6/scope6_var.h>
+#if 0
+#define LBDEBUG(fmt, ...) do { \
+ printf("%s: " fmt "\n", __func__, ## __VA_ARGS__); \
+} while (0)
+#define LBDEBUG1(fmt, ...) do { \
+ char _addr[50]; \
+ printf("%s: " fmt "\n", __func__, ## __VA_ARGS__); \
+} while (0)
+#define LBDEBUG2(fmt, ...) do { \
+ char _laddr[50], _faddr[50]; \
+ printf("%s: " fmt "\n", __func__, ## __VA_ARGS__); \
+} while (0)
+#else
+#define LBDEBUG(fmt, ...)
+#define LBDEBUG1(fmt, ...)
+#define LBDEBUG2(fmt, ...)
+#endif
+
int
in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
struct ucred *cred)
@@ -887,31 +905,28 @@
return inp;
}
-static struct inpcb *
-in6_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
- const struct in6_addr *laddr, uint16_t lport, const struct in6_addr *faddr,
- uint16_t fport, int lookupflags)
+static struct inpcblbgroup *
+in6_pcblbgroup_lookup(const struct inpcbinfo *pcbinfo,
+ const struct in6_addr *laddr, uint16_t lport, const struct in6_addr *faddr,
+ uint16_t fport, int lookupflags)
{
- struct inpcb *local_wild;
const struct inpcblbgrouphead *hdr;
- struct inpcblbgroup *grp;
- uint32_t idx;
+ struct inpcblbgroup *grp, *grp_local_wild;
- INP_HASH_LOCK_ASSERT(pcbinfo);
-
- hdr = &pcbinfo->ipi_lbgrouphashbase[
- INP_PCBPORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)];
-
/*
- * Order of socket selection:
+ * Order of group selection:
* 1. non-wild.
* 2. wild (if lookupflags contains INPLOOKUP_WILDCARD).
*
* NOTE:
- * - Load balanced group does not contain jailed sockets.
- * - Load balanced does not contain IPv4 mapped INET6 wild sockets.
+ * - Load balanced group does not contain jailed sockets
+ * - Load balanced group does not contain IPv4 mapped INET6
+ * wild sockets
*/
- local_wild = NULL;
+ INP_HASH_LOCK_ASSERT(pcbinfo);
+ hdr = &pcbinfo->ipi_lbgrouphashbase[
+ INP_PCBPORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)];
+ grp_local_wild = NULL;
CK_LIST_FOREACH(grp, hdr, il_list) {
#ifdef INET
if (!(grp->il_vflag & INP_IPV6))
@@ -920,15 +935,123 @@
if (grp->il_lport != lport)
continue;
- idx = INP_PCBLBGROUP_PKTHASH(INP6_PCBHASHKEY(faddr), lport,
- fport) % grp->il_inpcnt;
if (IN6_ARE_ADDR_EQUAL(&grp->il6_laddr, laddr))
- return (grp->il_inp[idx]);
+ return (grp);
+
if (IN6_IS_ADDR_UNSPECIFIED(&grp->il6_laddr) &&
- (lookupflags & INPLOOKUP_WILDCARD) != 0)
- local_wild = grp->il_inp[idx];
+ (lookupflags & INPLOOKUP_WILDCARD))
+ grp_local_wild = grp;
}
- return (local_wild);
+ return (grp_local_wild);
+}
+
+static struct inpcb_lbstate *
+in6_pcblbstate_lookup(struct inpcblbgroup *grp, uint32_t hash,
+ const struct in6_addr *faddr, uint16_t fport)
+{
+ struct inpcb_lbstate *s;
+
+ NET_EPOCH_ASSERT();
+ CK_LIST_FOREACH(s, &INP_LBSTATE_HASH(grp, hash), grpchain) {
+ if (IN6_ARE_ADDR_EQUAL(&s->ie.ie6_faddr, faddr) &&
+ s->ie.ie_fport == fport &&
+ IN6_ARE_ADDR_EQUAL(&s->ie.ie6_laddr, &grp->il6_laddr) &&
+ s->ie.ie_lport == grp->il_lport) {
+ LBDEBUG2("matched state: %s:%u -> %s:%u",
+ inet_ntop(AF_INET6,
+ &grp->il6_laddr, _laddr, sizeof(_laddr)),
+ grp->il_lport,
+ inet_ntop(AF_INET6,
+ faddr, _faddr, sizeof(_faddr)),
+ fport);
+ break;
+ }
+ }
+ return (s);
+}
+
+void
+in6_pcblbstate_update(struct inpcb *inp, const struct in6_addr *laddr,
+ uint16_t lport, const struct in6_addr *faddr, uint16_t fport)
+{
+ struct inpcblbgroup *grp;
+ struct inpcb_lbstate *s;
+ uint32_t hash;
+
+ grp = in6_pcblbgroup_lookup(inp->inp_pcbinfo, laddr, lport, faddr,
+ fport, INPLOOKUP_WILDCARD);
+ if (grp == NULL)
+ return;
+
+ /* Update timestamp if state is existing */
+ hash = INP_PCBLBGROUP_PKTHASH(INP6_PCBHASHKEY(faddr), lport, fport);
+ s = in6_pcblbstate_lookup(grp, hash, faddr, fport);
+ if (s != NULL) {
+ s->ts = time_uptime;
+ MPASS(s->inp == inp);
+ return;
+ }
+
+ /* No state found. Try to allocate one. */
+ s = malloc(sizeof(*s), M_PCB, M_ZERO | M_NOWAIT);
+ if (s == NULL)
+ return;
+
+ s->ie.ie6_faddr = *faddr;
+ s->ie.ie6_laddr = *laddr;
+ s->ie.ie_fport = fport;
+ s->ie.ie_lport = lport;
+ s->ts = time_uptime;
+ s->inp = inp;
+
+ mtx_lock(grp->il_lock);
+ if (in6_pcblbstate_lookup(grp, hash, faddr, fport) == NULL) {
+ CK_LIST_INSERT_HEAD(&INP_LBSTATE_HASH(grp, hash),
+ s, grpchain);
+ CK_LIST_INSERT_HEAD(&inp->inp_lbstates, s, pcbchain);
+ inp->inp_lbscnt++;
+ LBDEBUG2("new state: %s:%u -> %s:%u",
+ inet_ntop(AF_INET6,
+ &grp->il6_laddr, _laddr, sizeof(_laddr)),
+ ntohs(grp->il_lport),
+ inet_ntop(AF_INET6,
+ faddr, _faddr, sizeof(_faddr)),
+ ntohs(fport));
+ } else {
+ free(s, M_PCB);
+ }
+ mtx_unlock(grp->il_lock);
+}
+
+static struct inpcb *
+in6_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
+ const struct in6_addr *laddr, uint16_t lport, const struct in6_addr *faddr,
+ uint16_t fport, int lookupflags)
+{
+ struct inpcb_lbstate *s;
+ struct inpcblbgroup *grp;
+ struct inpcb *inp;
+ uint32_t hash;
+ int i;
+
+ grp = in6_pcblbgroup_lookup(pcbinfo, laddr, lport, faddr,
+ fport, lookupflags);
+ if (grp == NULL)
+ return (NULL);
+
+ hash = INP_PCBLBGROUP_PKTHASH(INP6_PCBHASHKEY(faddr), lport, fport);
+ s = in6_pcblbstate_lookup(grp, hash, faddr, fport);
+ if (s == NULL) {
+ inp = grp->il_inp[hash % grp->il_inpcnt];
+ for (i = 0; i < grp->il_inpcnt; i++) {
+ if (inp->inp_lbscnt == 0)
+ break;
+ if (inp->inp_lbscnt > grp->il_inp[i]->inp_lbscnt)
+ inp = grp->il_inp[i];
+ }
+ } else
+ inp = s->inp;
+ return (inp);
}
#ifdef PCBGROUP
Index: sys/netinet6/udp6_usrreq.c
===================================================================
--- sys/netinet6/udp6_usrreq.c
+++ sys/netinet6/udp6_usrreq.c
@@ -992,6 +992,12 @@
}
#endif
+ /* LB states handling */
+ /* XXXAE: addresses have embedded zone id */
+ if (inp_so_options(inp) & SO_REUSEPORT_LB)
+ in6_pcblbstate_update(inp, laddr, inp->inp_lport,
+ faddr, fport);
+
UDPSTAT_INC(udps_opackets);
if (nxt == IPPROTO_UDPLITE)
UDPLITE_PROBE(send, NULL, inp, ip6, inp, udp6);

File Metadata

Mime Type
text/plain
Expires
Fri, Apr 25, 3:29 PM (17 h, 45 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
17771982
Default Alt Text
D26672.diff (21 KB)

Event Timeline