Page MenuHomeFreeBSD

D18373.diff
No OneTemporary

D18373.diff

Index: head/share/man/man4/pfsync.4
===================================================================
--- head/share/man/man4/pfsync.4
+++ head/share/man/man4/pfsync.4
@@ -26,7 +26,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd August 18, 2017
+.Dd December 6, 2018
.Dt PFSYNC 4
.Os
.Sh NAME
@@ -130,6 +130,13 @@
.Xr carp 4
for more information.
Default value is 240.
+.It Va net.pfsync.pfsync_buckets
+The number of
+.Nm
+buckets.
+This affects the performance and memory tradeoff.
+Defaults to twice the number of CPUs.
+Change only if benchmarks show this helps on your workload.
.El
.Sh EXAMPLES
.Nm
Index: head/sys/netpfil/pf/if_pfsync.c
===================================================================
--- head/sys/netpfil/pf/if_pfsync.c
+++ head/sys/netpfil/pf/if_pfsync.c
@@ -77,6 +77,7 @@
#include <sys/mutex.h>
#include <sys/priv.h>
#include <sys/protosw.h>
+#include <sys/smp.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
@@ -106,6 +107,8 @@
sizeof(struct pfsync_header) + \
sizeof(struct pfsync_subheader) )
+struct pfsync_bucket;
+
struct pfsync_pkt {
struct ip *ip;
struct in_addr src;
@@ -164,7 +167,7 @@
};
static void pfsync_q_ins(struct pf_state *, int, bool);
-static void pfsync_q_del(struct pf_state *, bool);
+static void pfsync_q_del(struct pf_state *, bool, struct pfsync_bucket *);
static void pfsync_update_state(struct pf_state *);
@@ -183,6 +186,28 @@
struct mbuf *pd_m;
};
+struct pfsync_sofct;
+
+struct pfsync_bucket
+{
+ int b_id;
+ struct pfsync_softc *b_sc;
+ struct mtx b_mtx;
+ struct callout b_tmo;
+ int b_flags;
+#define PFSYNCF_BUCKET_PUSH 0x00000001
+
+ size_t b_len;
+ TAILQ_HEAD(, pf_state) b_qs[PFSYNC_S_COUNT];
+ TAILQ_HEAD(, pfsync_upd_req_item) b_upd_req_list;
+ TAILQ_HEAD(, pfsync_deferral) b_deferrals;
+ u_int b_deferred;
+ void *b_plus;
+ size_t b_pluslen;
+
+ struct ifaltq b_snd;
+};
+
struct pfsync_softc {
/* Configuration */
struct ifnet *sc_ifp;
@@ -192,20 +217,12 @@
uint32_t sc_flags;
#define PFSYNCF_OK 0x00000001
#define PFSYNCF_DEFER 0x00000002
-#define PFSYNCF_PUSH 0x00000004
uint8_t sc_maxupdates;
struct ip sc_template;
- struct callout sc_tmo;
struct mtx sc_mtx;
/* Queued data */
- size_t sc_len;
- TAILQ_HEAD(, pf_state) sc_qs[PFSYNC_S_COUNT];
- TAILQ_HEAD(, pfsync_upd_req_item) sc_upd_req_list;
- TAILQ_HEAD(, pfsync_deferral) sc_deferrals;
- u_int sc_deferred;
- void *sc_plus;
- size_t sc_pluslen;
+ struct pfsync_bucket *sc_buckets;
/* Bulk update info */
struct mtx sc_bulk_mtx;
@@ -223,6 +240,10 @@
#define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx)
#define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED)
+#define PFSYNC_BUCKET_LOCK(b) mtx_lock(&(b)->b_mtx)
+#define PFSYNC_BUCKET_UNLOCK(b) mtx_unlock(&(b)->b_mtx)
+#define PFSYNC_BUCKET_LOCK_ASSERT(b) mtx_assert(&(b)->b_mtx, MA_OWNED)
+
#define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx)
#define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx)
#define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED)
@@ -239,7 +260,8 @@
#define V_pfsync_carp_adj VNET(pfsync_carp_adj)
static void pfsync_timeout(void *);
-static void pfsync_push(struct pfsync_softc *);
+static void pfsync_push(struct pfsync_bucket *);
+static void pfsync_push_all(struct pfsync_softc *);
static void pfsyncintr(void *);
static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *,
void *);
@@ -249,12 +271,16 @@
static int pfsync_init(void);
static void pfsync_uninit(void);
+static unsigned long pfsync_buckets;
+
SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC");
SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(pfsyncstats), pfsyncstats,
"PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW,
&VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment");
+SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN,
+ &pfsync_buckets, 0, "Number of pfsync hash buckets");
static int pfsync_clone_create(struct if_clone *, int, caddr_t);
static void pfsync_clone_destroy(struct ifnet *);
@@ -270,10 +296,10 @@
static void pfsync_defer_tmo(void *);
static void pfsync_request_update(u_int32_t, u_int64_t);
-static void pfsync_update_state_req(struct pf_state *);
+static bool pfsync_update_state_req(struct pf_state *);
static void pfsync_drop(struct pfsync_softc *);
-static void pfsync_sendout(int);
+static void pfsync_sendout(int, int);
static void pfsync_send_plus(void *, size_t);
static void pfsync_bulk_start(void);
@@ -285,7 +311,10 @@
#ifdef IPSEC
static void pfsync_update_net_tdb(struct pfsync_tdb *);
#endif
+static struct pfsync_bucket *pfsync_get_bucket(struct pfsync_softc *,
+ struct pf_state *);
+
#define PFSYNC_MAX_BULKTRIES 12
VNET_DEFINE(struct if_clone *, pfsync_cloner);
@@ -296,21 +325,16 @@
{
struct pfsync_softc *sc;
struct ifnet *ifp;
- int q;
+ struct pfsync_bucket *b;
+ int c, q;
if (unit != 0)
return (EINVAL);
- sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
- sc->sc_flags |= PFSYNCF_OK;
+ if (! pfsync_buckets)
+ pfsync_buckets = mp_ncpus * 2;
- for (q = 0; q < PFSYNC_S_COUNT; q++)
- TAILQ_INIT(&sc->sc_qs[q]);
-
- TAILQ_INIT(&sc->sc_upd_req_list);
- TAILQ_INIT(&sc->sc_deferrals);
-
- sc->sc_len = PFSYNC_MINPKT;
+ sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
sc->sc_maxupdates = 128;
ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
@@ -323,12 +347,10 @@
ifp->if_ioctl = pfsyncioctl;
ifp->if_output = pfsyncoutput;
ifp->if_type = IFT_PFSYNC;
- ifp->if_snd.ifq_maxlen = ifqmaxlen;
ifp->if_hdrlen = sizeof(struct pfsync_header);
ifp->if_mtu = ETHERMTU;
mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF);
mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF);
- callout_init(&sc->sc_tmo, 1);
callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0);
callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0);
@@ -336,6 +358,27 @@
bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
+ sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets),
+ M_PFSYNC, M_ZERO | M_WAITOK);
+ for (c = 0; c < pfsync_buckets; c++) {
+ b = &sc->sc_buckets[c];
+ mtx_init(&b->b_mtx, pfsyncname, NULL, MTX_DEF);
+
+ b->b_id = c;
+ b->b_sc = sc;
+ b->b_len = PFSYNC_MINPKT;
+
+ for (q = 0; q < PFSYNC_S_COUNT; q++)
+ TAILQ_INIT(&b->b_qs[q]);
+
+ TAILQ_INIT(&b->b_upd_req_list);
+ TAILQ_INIT(&b->b_deferrals);
+
+ callout_init(&b->b_tmo, 1);
+
+ b->b_snd.ifq_maxlen = ifqmaxlen;
+ }
+
V_pfsyncif = sc;
return (0);
@@ -345,29 +388,36 @@
pfsync_clone_destroy(struct ifnet *ifp)
{
struct pfsync_softc *sc = ifp->if_softc;
+ struct pfsync_bucket *b;
+ int c;
- /*
- * At this stage, everything should have already been
- * cleared by pfsync_uninit(), and we have only to
- * drain callouts.
- */
- while (sc->sc_deferred > 0) {
- struct pfsync_deferral *pd = TAILQ_FIRST(&sc->sc_deferrals);
+ for (c = 0; c < pfsync_buckets; c++) {
+ b = &sc->sc_buckets[c];
+ /*
+ * At this stage, everything should have already been
+ * cleared by pfsync_uninit(), and we have only to
+ * drain callouts.
+ */
+ while (b->b_deferred > 0) {
+ struct pfsync_deferral *pd =
+ TAILQ_FIRST(&b->b_deferrals);
- TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
- sc->sc_deferred--;
- if (callout_stop(&pd->pd_tmo) > 0) {
- pf_release_state(pd->pd_st);
- m_freem(pd->pd_m);
- free(pd, M_PFSYNC);
- } else {
- pd->pd_refs++;
- callout_drain(&pd->pd_tmo);
- free(pd, M_PFSYNC);
+ TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
+ b->b_deferred--;
+ if (callout_stop(&pd->pd_tmo) > 0) {
+ pf_release_state(pd->pd_st);
+ m_freem(pd->pd_m);
+ free(pd, M_PFSYNC);
+ } else {
+ pd->pd_refs++;
+ callout_drain(&pd->pd_tmo);
+ free(pd, M_PFSYNC);
+ }
}
+
+ callout_drain(&b->b_tmo);
}
- callout_drain(&sc->sc_tmo);
callout_drain(&sc->sc_bulkfail_tmo);
callout_drain(&sc->sc_bulk_tmo);
@@ -383,6 +433,8 @@
pfsync_multicast_cleanup(sc);
mtx_destroy(&sc->sc_mtx);
mtx_destroy(&sc->sc_bulk_mtx);
+
+ free(sc->sc_buckets, M_PFSYNC);
free(sc, M_PFSYNC);
V_pfsyncif = NULL;
@@ -546,7 +598,7 @@
st->state_flags &= ~PFSTATE_NOSYNC;
if (st->state_flags & PFSTATE_ACK) {
pfsync_q_ins(st, PFSYNC_S_IACK, true);
- pfsync_push(sc);
+ pfsync_push_all(sc);
}
}
st->state_flags &= ~PFSTATE_ACK;
@@ -785,9 +837,7 @@
continue;
if (st->state_flags & PFSTATE_ACK) {
- PFSYNC_LOCK(V_pfsyncif);
pfsync_undefer_state(st, 0);
- PFSYNC_UNLOCK(V_pfsyncif);
}
PF_STATE_UNLOCK(st);
}
@@ -876,9 +926,7 @@
}
if (st->state_flags & PFSTATE_ACK) {
- PFSYNC_LOCK(sc);
pfsync_undefer_state(st, 1);
- PFSYNC_UNLOCK(sc);
}
if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
@@ -912,9 +960,7 @@
pfsync_update_state(st);
PF_STATE_UNLOCK(st);
- PFSYNC_LOCK(sc);
- pfsync_push(sc);
- PFSYNC_UNLOCK(sc);
+ pfsync_push_all(sc);
continue;
}
PF_STATE_UNLOCK(st);
@@ -960,16 +1006,14 @@
st = pf_find_state_byid(up->id, up->creatorid);
if (st == NULL) {
/* We don't have this state. Ask for it. */
- PFSYNC_LOCK(sc);
+ PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]);
pfsync_request_update(up->creatorid, up->id);
- PFSYNC_UNLOCK(sc);
+ PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]);
continue;
}
if (st->state_flags & PFSTATE_ACK) {
- PFSYNC_LOCK(sc);
pfsync_undefer_state(st, 1);
- PFSYNC_UNLOCK(sc);
}
if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
@@ -1003,9 +1047,7 @@
pfsync_update_state(st);
PF_STATE_UNLOCK(st);
- PFSYNC_LOCK(sc);
- pfsync_push(sc);
- PFSYNC_UNLOCK(sc);
+ pfsync_push_all(sc);
continue;
}
PF_STATE_UNLOCK(st);
@@ -1283,6 +1325,7 @@
struct ifreq *ifr = (struct ifreq *)data;
struct pfsyncreq pfsyncr;
int error;
+ int c;
switch (cmd) {
case SIOCSIFFLAGS:
@@ -1303,10 +1346,12 @@
ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
return (EINVAL);
if (ifr->ifr_mtu < ifp->if_mtu) {
- PFSYNC_LOCK(sc);
- if (sc->sc_len > PFSYNC_MINPKT)
- pfsync_sendout(1);
- PFSYNC_UNLOCK(sc);
+ for (c = 0; c < pfsync_buckets; c++) {
+ PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]);
+ if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT)
+ pfsync_sendout(1, c);
+ PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]);
+ }
}
ifp->if_mtu = ifr->ifr_mtu;
break;
@@ -1379,12 +1424,16 @@
break;
}
- if (sc->sc_len > PFSYNC_MINPKT &&
- (sifp->if_mtu < sc->sc_ifp->if_mtu ||
- (sc->sc_sync_if != NULL &&
- sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
- sifp->if_mtu < MCLBYTES - sizeof(struct ip)))
- pfsync_sendout(1);
+ for (c = 0; c < pfsync_buckets; c++) {
+ PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]);
+ if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT &&
+ (sifp->if_mtu < sc->sc_ifp->if_mtu ||
+ (sc->sc_sync_if != NULL &&
+ sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
+ sifp->if_mtu < MCLBYTES - sizeof(struct ip)))
+ pfsync_sendout(1, c);
+ PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]);
+ }
if (imo->imo_membership)
pfsync_multicast_cleanup(sc);
@@ -1421,8 +1470,10 @@
sc->sc_flags &= ~PFSYNCF_OK;
if (V_pf_status.debug >= PF_DEBUG_MISC)
printf("pfsync: requesting bulk update\n");
- pfsync_request_update(0, 0);
PFSYNC_UNLOCK(sc);
+ PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]);
+ pfsync_request_update(0, 0);
+ PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]);
PFSYNC_BLOCK(sc);
sc->sc_ureq_sent = time_uptime;
callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail,
@@ -1483,33 +1534,37 @@
{
struct pf_state *st, *next;
struct pfsync_upd_req_item *ur;
- int q;
+ struct pfsync_bucket *b;
+ int c, q;
- for (q = 0; q < PFSYNC_S_COUNT; q++) {
- if (TAILQ_EMPTY(&sc->sc_qs[q]))
- continue;
+ for (c = 0; c < pfsync_buckets; c++) {
+ b = &sc->sc_buckets[c];
+ for (q = 0; q < PFSYNC_S_COUNT; q++) {
+ if (TAILQ_EMPTY(&b->b_qs[q]))
+ continue;
- TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) {
- KASSERT(st->sync_state == q,
- ("%s: st->sync_state == q",
- __func__));
- st->sync_state = PFSYNC_S_NONE;
- pf_release_state(st);
+ TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, next) {
+ KASSERT(st->sync_state == q,
+ ("%s: st->sync_state == q",
+ __func__));
+ st->sync_state = PFSYNC_S_NONE;
+ pf_release_state(st);
+ }
+ TAILQ_INIT(&b->b_qs[q]);
}
- TAILQ_INIT(&sc->sc_qs[q]);
- }
- while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
- TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
- free(ur, M_PFSYNC);
- }
+ while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) {
+ TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry);
+ free(ur, M_PFSYNC);
+ }
- sc->sc_plus = NULL;
- sc->sc_len = PFSYNC_MINPKT;
+ b->b_len = PFSYNC_MINPKT;
+ b->b_plus = NULL;
+ }
}
static void
-pfsync_sendout(int schedswi)
+pfsync_sendout(int schedswi, int c)
{
struct pfsync_softc *sc = V_pfsyncif;
struct ifnet *ifp = sc->sc_ifp;
@@ -1519,27 +1574,28 @@
struct pfsync_subheader *subh;
struct pf_state *st, *st_next;
struct pfsync_upd_req_item *ur;
+ struct pfsync_bucket *b = &sc->sc_buckets[c];
int offset;
int q, count = 0;
KASSERT(sc != NULL, ("%s: null sc", __func__));
- KASSERT(sc->sc_len > PFSYNC_MINPKT,
- ("%s: sc_len %zu", __func__, sc->sc_len));
- PFSYNC_LOCK_ASSERT(sc);
+ KASSERT(b->b_len > PFSYNC_MINPKT,
+ ("%s: sc_len %zu", __func__, b->b_len));
+ PFSYNC_BUCKET_LOCK_ASSERT(b);
if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) {
pfsync_drop(sc);
return;
}
- m = m_get2(max_linkhdr + sc->sc_len, M_NOWAIT, MT_DATA, M_PKTHDR);
+ m = m_get2(max_linkhdr + b->b_len, M_NOWAIT, MT_DATA, M_PKTHDR);
if (m == NULL) {
if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
V_pfsyncstats.pfsyncs_onomem++;
return;
}
m->m_data += max_linkhdr;
- m->m_len = m->m_pkthdr.len = sc->sc_len;
+ m->m_len = m->m_pkthdr.len = b->b_len;
/* build the ip header */
ip = (struct ip *)m->m_data;
@@ -1555,19 +1611,19 @@
offset += sizeof(*ph);
ph->version = PFSYNC_VERSION;
- ph->len = htons(sc->sc_len - sizeof(*ip));
+ ph->len = htons(b->b_len - sizeof(*ip));
bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
/* walk the queues */
for (q = 0; q < PFSYNC_S_COUNT; q++) {
- if (TAILQ_EMPTY(&sc->sc_qs[q]))
+ if (TAILQ_EMPTY(&b->b_qs[q]))
continue;
subh = (struct pfsync_subheader *)(m->m_data + offset);
offset += sizeof(*subh);
count = 0;
- TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, st_next) {
+ TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, st_next) {
KASSERT(st->sync_state == q,
("%s: st->sync_state == q",
__func__));
@@ -1581,7 +1637,7 @@
pf_release_state(st);
count++;
}
- TAILQ_INIT(&sc->sc_qs[q]);
+ TAILQ_INIT(&b->b_qs[q]);
bzero(subh, sizeof(*subh));
subh->action = pfsync_qs[q].action;
@@ -1589,13 +1645,13 @@
V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count;
}
- if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
+ if (!TAILQ_EMPTY(&b->b_upd_req_list)) {
subh = (struct pfsync_subheader *)(m->m_data + offset);
offset += sizeof(*subh);
count = 0;
- while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
- TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
+ while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) {
+ TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry);
bcopy(&ur->ur_msg, m->m_data + offset,
sizeof(ur->ur_msg));
@@ -1611,11 +1667,11 @@
}
/* has someone built a custom region for us to add? */
- if (sc->sc_plus != NULL) {
- bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
- offset += sc->sc_pluslen;
+ if (b->b_plus != NULL) {
+ bcopy(b->b_plus, m->m_data + offset, b->b_pluslen);
+ offset += b->b_pluslen;
- sc->sc_plus = NULL;
+ b->b_plus = NULL;
}
subh = (struct pfsync_subheader *)(m->m_data + offset);
@@ -1629,24 +1685,24 @@
/* we're done, let's put it on the wire */
if (ifp->if_bpf) {
m->m_data += sizeof(*ip);
- m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
+ m->m_len = m->m_pkthdr.len = b->b_len - sizeof(*ip);
BPF_MTAP(ifp, m);
m->m_data -= sizeof(*ip);
- m->m_len = m->m_pkthdr.len = sc->sc_len;
+ m->m_len = m->m_pkthdr.len = b->b_len;
}
if (sc->sc_sync_if == NULL) {
- sc->sc_len = PFSYNC_MINPKT;
+ b->b_len = PFSYNC_MINPKT;
m_freem(m);
return;
}
if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
- sc->sc_len = PFSYNC_MINPKT;
+ b->b_len = PFSYNC_MINPKT;
- if (!_IF_QFULL(&sc->sc_ifp->if_snd))
- _IF_ENQUEUE(&sc->sc_ifp->if_snd, m);
+ if (!_IF_QFULL(&b->b_snd))
+ _IF_ENQUEUE(&b->b_snd, m);
else {
m_freem(m);
if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
@@ -1659,6 +1715,7 @@
pfsync_insert_state(struct pf_state *st)
{
struct pfsync_softc *sc = V_pfsyncif;
+ struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
if (st->state_flags & PFSTATE_NOSYNC)
return;
@@ -1672,12 +1729,12 @@
KASSERT(st->sync_state == PFSYNC_S_NONE,
("%s: st->sync_state %u", __func__, st->sync_state));
- PFSYNC_LOCK(sc);
- if (sc->sc_len == PFSYNC_MINPKT)
- callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
+ PFSYNC_BUCKET_LOCK(b);
+ if (b->b_len == PFSYNC_MINPKT)
+ callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
pfsync_q_ins(st, PFSYNC_S_INS, true);
- PFSYNC_UNLOCK(sc);
+ PFSYNC_BUCKET_UNLOCK(b);
st->sync_updates = 0;
}
@@ -1687,6 +1744,7 @@
{
struct pfsync_softc *sc = V_pfsyncif;
struct pfsync_deferral *pd;
+ struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
if (m->m_flags & (M_BCAST|M_MCAST))
return (0);
@@ -1699,13 +1757,13 @@
return (0);
}
- if (sc->sc_deferred >= 128)
- pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
+ if (b->b_deferred >= 128)
+ pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0);
pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT);
if (pd == NULL)
return (0);
- sc->sc_deferred++;
+ b->b_deferred++;
m->m_flags |= M_SKIP_FIREWALL;
st->state_flags |= PFSTATE_ACK;
@@ -1716,11 +1774,11 @@
pf_ref_state(st);
pd->pd_m = m;
- TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
- callout_init_mtx(&pd->pd_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
+ TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry);
+ callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED);
callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd);
- pfsync_push(sc);
+ pfsync_push(b);
return (1);
}
@@ -1731,11 +1789,12 @@
struct pfsync_softc *sc = pd->pd_sc;
struct mbuf *m = pd->pd_m;
struct pf_state *st = pd->pd_st;
+ struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
- PFSYNC_LOCK_ASSERT(sc);
+ PFSYNC_BUCKET_LOCK_ASSERT(b);
- TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
- sc->sc_deferred--;
+ TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
+ b->b_deferred--;
pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */
free(pd, M_PFSYNC);
pf_release_state(st);
@@ -1743,8 +1802,8 @@
if (drop)
m_freem(m);
else {
- _IF_ENQUEUE(&sc->sc_ifp->if_snd, m);
- pfsync_push(sc);
+ _IF_ENQUEUE(&b->b_snd, m);
+ pfsync_push(b);
}
}
@@ -1755,13 +1814,14 @@
struct pfsync_softc *sc = pd->pd_sc;
struct mbuf *m = pd->pd_m;
struct pf_state *st = pd->pd_st;
+ struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
- PFSYNC_LOCK_ASSERT(sc);
+ PFSYNC_BUCKET_LOCK_ASSERT(b);
CURVNET_SET(m->m_pkthdr.rcvif->if_vnet);
- TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
- sc->sc_deferred--;
+ TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
+ b->b_deferred--;
pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */
if (pd->pd_refs == 0)
free(pd, M_PFSYNC);
@@ -1779,40 +1839,52 @@
{
struct pfsync_softc *sc = V_pfsyncif;
struct pfsync_deferral *pd;
+ struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
- PFSYNC_LOCK_ASSERT(sc);
+ PFSYNC_BUCKET_LOCK(b);
- TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
+ TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) {
if (pd->pd_st == st) {
if (callout_stop(&pd->pd_tmo) > 0)
pfsync_undefer(pd, drop);
+
+ PFSYNC_BUCKET_UNLOCK(b);
return;
}
}
+ PFSYNC_BUCKET_UNLOCK(b);
panic("%s: unable to find deferred state", __func__);
}
+static struct pfsync_bucket*
+pfsync_get_bucket(struct pfsync_softc *sc, struct pf_state *st)
+{
+ int c = PF_IDHASH(st) % pfsync_buckets;
+ return &sc->sc_buckets[c];
+}
+
static void
pfsync_update_state(struct pf_state *st)
{
struct pfsync_softc *sc = V_pfsyncif;
bool sync = false, ref = true;
+ struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
PF_STATE_LOCK_ASSERT(st);
- PFSYNC_LOCK(sc);
+ PFSYNC_BUCKET_LOCK(b);
if (st->state_flags & PFSTATE_ACK)
pfsync_undefer_state(st, 0);
if (st->state_flags & PFSTATE_NOSYNC) {
if (st->sync_state != PFSYNC_S_NONE)
- pfsync_q_del(st, true);
- PFSYNC_UNLOCK(sc);
+ pfsync_q_del(st, true, b);
+ PFSYNC_BUCKET_UNLOCK(b);
return;
}
- if (sc->sc_len == PFSYNC_MINPKT)
- callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
+ if (b->b_len == PFSYNC_MINPKT)
+ callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
switch (st->sync_state) {
case PFSYNC_S_UPD_C:
@@ -1828,7 +1900,7 @@
break;
case PFSYNC_S_IACK:
- pfsync_q_del(st, false);
+ pfsync_q_del(st, false, b);
ref = false;
/* FALLTHROUGH */
@@ -1842,26 +1914,27 @@
}
if (sync || (time_uptime - st->pfsync_time) < 2)
- pfsync_push(sc);
+ pfsync_push(b);
- PFSYNC_UNLOCK(sc);
+ PFSYNC_BUCKET_UNLOCK(b);
}
static void
pfsync_request_update(u_int32_t creatorid, u_int64_t id)
{
struct pfsync_softc *sc = V_pfsyncif;
+ struct pfsync_bucket *b = &sc->sc_buckets[0];
struct pfsync_upd_req_item *item;
size_t nlen = sizeof(struct pfsync_upd_req);
- PFSYNC_LOCK_ASSERT(sc);
+ PFSYNC_BUCKET_LOCK_ASSERT(b);
/*
* This code does a bit to prevent multiple update requests for the
* same state being generated. It searches current subheader queue,
* but it doesn't lookup into queue of already packed datagrams.
*/
- TAILQ_FOREACH(item, &sc->sc_upd_req_list, ur_entry)
+ TAILQ_FOREACH(item, &b->b_upd_req_list, ur_entry)
if (item->ur_msg.id == id &&
item->ur_msg.creatorid == creatorid)
return;
@@ -1873,46 +1946,47 @@
item->ur_msg.id = id;
item->ur_msg.creatorid = creatorid;
- if (TAILQ_EMPTY(&sc->sc_upd_req_list))
+ if (TAILQ_EMPTY(&b->b_upd_req_list))
nlen += sizeof(struct pfsync_subheader);
- if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
- pfsync_sendout(1);
+ if (b->b_len + nlen > sc->sc_ifp->if_mtu) {
+ pfsync_sendout(1, 0);
nlen = sizeof(struct pfsync_subheader) +
sizeof(struct pfsync_upd_req);
}
- TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
- sc->sc_len += nlen;
+ TAILQ_INSERT_TAIL(&b->b_upd_req_list, item, ur_entry);
+ b->b_len += nlen;
}
-static void
+static bool
pfsync_update_state_req(struct pf_state *st)
{
struct pfsync_softc *sc = V_pfsyncif;
- bool ref = true;
+ bool ref = true, full = false;
+ struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
PF_STATE_LOCK_ASSERT(st);
- PFSYNC_LOCK(sc);
+ PFSYNC_BUCKET_LOCK(b);
if (st->state_flags & PFSTATE_NOSYNC) {
if (st->sync_state != PFSYNC_S_NONE)
- pfsync_q_del(st, true);
- PFSYNC_UNLOCK(sc);
- return;
+ pfsync_q_del(st, true, b);
+ PFSYNC_BUCKET_UNLOCK(b);
+ return (full);
}
switch (st->sync_state) {
case PFSYNC_S_UPD_C:
case PFSYNC_S_IACK:
- pfsync_q_del(st, false);
+ pfsync_q_del(st, false, b);
ref = false;
/* FALLTHROUGH */
case PFSYNC_S_NONE:
pfsync_q_ins(st, PFSYNC_S_UPD, ref);
- pfsync_push(sc);
+ pfsync_push(b);
break;
case PFSYNC_S_INS:
@@ -1925,38 +1999,44 @@
panic("%s: unexpected sync state %d", __func__, st->sync_state);
}
- PFSYNC_UNLOCK(sc);
+ if ((sc->sc_ifp->if_mtu - b->b_len) < sizeof(struct pfsync_state))
+ full = true;
+
+ PFSYNC_BUCKET_UNLOCK(b);
+
+ return (full);
}
static void
pfsync_delete_state(struct pf_state *st)
{
struct pfsync_softc *sc = V_pfsyncif;
+ struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
bool ref = true;
- PFSYNC_LOCK(sc);
+ PFSYNC_BUCKET_LOCK(b);
if (st->state_flags & PFSTATE_ACK)
pfsync_undefer_state(st, 1);
if (st->state_flags & PFSTATE_NOSYNC) {
if (st->sync_state != PFSYNC_S_NONE)
- pfsync_q_del(st, true);
- PFSYNC_UNLOCK(sc);
+ pfsync_q_del(st, true, b);
+ PFSYNC_BUCKET_UNLOCK(b);
return;
}
- if (sc->sc_len == PFSYNC_MINPKT)
- callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
+ if (b->b_len == PFSYNC_MINPKT)
+ callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
switch (st->sync_state) {
case PFSYNC_S_INS:
/* We never got to tell the world so just forget about it. */
- pfsync_q_del(st, true);
+ pfsync_q_del(st, true, b);
break;
case PFSYNC_S_UPD_C:
case PFSYNC_S_UPD:
case PFSYNC_S_IACK:
- pfsync_q_del(st, false);
+ pfsync_q_del(st, false, b);
ref = false;
/* FALLTHROUGH */
@@ -1968,13 +2048,12 @@
panic("%s: unexpected sync state %d", __func__, st->sync_state);
}
- PFSYNC_UNLOCK(sc);
+ PFSYNC_BUCKET_UNLOCK(b);
}
static void
pfsync_clear_states(u_int32_t creatorid, const char *ifname)
{
- struct pfsync_softc *sc = V_pfsyncif;
struct {
struct pfsync_subheader subh;
struct pfsync_clr clr;
@@ -1989,9 +2068,7 @@
strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
r.clr.creatorid = creatorid;
- PFSYNC_LOCK(sc);
pfsync_send_plus(&r, sizeof(r));
- PFSYNC_UNLOCK(sc);
}
static void
@@ -1999,48 +2076,48 @@
{
struct pfsync_softc *sc = V_pfsyncif;
size_t nlen = pfsync_qs[q].len;
+ struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
- PFSYNC_LOCK_ASSERT(sc);
+ PFSYNC_BUCKET_LOCK_ASSERT(b);
KASSERT(st->sync_state == PFSYNC_S_NONE,
("%s: st->sync_state %u", __func__, st->sync_state));
- KASSERT(sc->sc_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu",
- sc->sc_len));
+ KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu",
+ b->b_len));
- if (TAILQ_EMPTY(&sc->sc_qs[q]))
+ if (TAILQ_EMPTY(&b->b_qs[q]))
nlen += sizeof(struct pfsync_subheader);
- if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
- pfsync_sendout(1);
+ if (b->b_len + nlen > sc->sc_ifp->if_mtu) {
+ pfsync_sendout(1, b->b_id);
nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
}
- sc->sc_len += nlen;
- TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
+ b->b_len += nlen;
+ TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list);
st->sync_state = q;
if (ref)
pf_ref_state(st);
}
static void
-pfsync_q_del(struct pf_state *st, bool unref)
+pfsync_q_del(struct pf_state *st, bool unref, struct pfsync_bucket *b)
{
- struct pfsync_softc *sc = V_pfsyncif;
int q = st->sync_state;
- PFSYNC_LOCK_ASSERT(sc);
+ PFSYNC_BUCKET_LOCK_ASSERT(b);
KASSERT(st->sync_state != PFSYNC_S_NONE,
("%s: st->sync_state != PFSYNC_S_NONE", __func__));
- sc->sc_len -= pfsync_qs[q].len;
- TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
+ b->b_len -= pfsync_qs[q].len;
+ TAILQ_REMOVE(&b->b_qs[q], st, sync_list);
st->sync_state = PFSYNC_S_NONE;
if (unref)
pf_release_state(st);
- if (TAILQ_EMPTY(&sc->sc_qs[q]))
- sc->sc_len -= sizeof(struct pfsync_subheader);
+ if (TAILQ_EMPTY(&b->b_qs[q]))
+ b->b_len -= sizeof(struct pfsync_subheader);
}
static void
@@ -2094,23 +2171,19 @@
}
for (; s; s = LIST_NEXT(s, entry)) {
-
- if (sent > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) <
- sizeof(struct pfsync_state)) {
- /* We've filled a packet. */
- sc->sc_bulk_hashid = i;
- sc->sc_bulk_stateid = s->id;
- sc->sc_bulk_creatorid = s->creatorid;
- PF_HASHROW_UNLOCK(ih);
- callout_reset(&sc->sc_bulk_tmo, 1,
- pfsync_bulk_update, sc);
- goto full;
- }
-
if (s->sync_state == PFSYNC_S_NONE &&
s->timeout < PFTM_MAX &&
s->pfsync_time <= sc->sc_ureq_received) {
- pfsync_update_state_req(s);
+ if (pfsync_update_state_req(s)) {
+ /* We've filled a packet. */
+ sc->sc_bulk_hashid = i;
+ sc->sc_bulk_stateid = s->id;
+ sc->sc_bulk_creatorid = s->creatorid;
+ PF_HASHROW_UNLOCK(ih);
+ callout_reset(&sc->sc_bulk_tmo, 1,
+ pfsync_bulk_update, sc);
+ goto full;
+ }
sent++;
}
}
@@ -2119,7 +2192,6 @@
/* We're done. */
pfsync_bulk_status(PFSYNC_BUS_END);
-
full:
CURVNET_RESTORE();
}
@@ -2144,15 +2216,14 @@
r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
r.bus.status = status;
- PFSYNC_LOCK(sc);
pfsync_send_plus(&r, sizeof(r));
- PFSYNC_UNLOCK(sc);
}
static void
pfsync_bulk_fail(void *arg)
{
struct pfsync_softc *sc = arg;
+ struct pfsync_bucket *b = &sc->sc_buckets[0];
CURVNET_SET(sc->sc_ifp->if_vnet);
@@ -2162,9 +2233,9 @@
/* Try again */
callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
pfsync_bulk_fail, V_pfsyncif);
- PFSYNC_LOCK(sc);
+ PFSYNC_BUCKET_LOCK(b);
pfsync_request_update(0, 0);
- PFSYNC_UNLOCK(sc);
+ PFSYNC_BUCKET_UNLOCK(b);
} else {
/* Pretend like the transfer was ok. */
sc->sc_ureq_sent = 0;
@@ -2186,73 +2257,96 @@
pfsync_send_plus(void *plus, size_t pluslen)
{
struct pfsync_softc *sc = V_pfsyncif;
+ struct pfsync_bucket *b = &sc->sc_buckets[0];
- PFSYNC_LOCK_ASSERT(sc);
+ PFSYNC_BUCKET_LOCK(b);
- if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu)
- pfsync_sendout(1);
+ if (b->b_len + pluslen > sc->sc_ifp->if_mtu)
+ pfsync_sendout(1, b->b_id);
- sc->sc_plus = plus;
- sc->sc_len += (sc->sc_pluslen = pluslen);
+ b->b_plus = plus;
+ b->b_len += (b->b_pluslen = pluslen);
- pfsync_sendout(1);
+ pfsync_sendout(1, b->b_id);
+ PFSYNC_BUCKET_UNLOCK(b);
}
static void
pfsync_timeout(void *arg)
{
- struct pfsync_softc *sc = arg;
+ struct pfsync_bucket *b = arg;
- CURVNET_SET(sc->sc_ifp->if_vnet);
- PFSYNC_LOCK(sc);
- pfsync_push(sc);
- PFSYNC_UNLOCK(sc);
+ CURVNET_SET(b->b_sc->sc_ifp->if_vnet);
+ PFSYNC_BUCKET_LOCK(b);
+ pfsync_push(b);
+ PFSYNC_BUCKET_UNLOCK(b);
CURVNET_RESTORE();
}
static void
-pfsync_push(struct pfsync_softc *sc)
+pfsync_push(struct pfsync_bucket *b)
{
- PFSYNC_LOCK_ASSERT(sc);
+ PFSYNC_BUCKET_LOCK_ASSERT(b);
- sc->sc_flags |= PFSYNCF_PUSH;
+ b->b_flags |= PFSYNCF_BUCKET_PUSH;
swi_sched(V_pfsync_swi_cookie, 0);
}
static void
+pfsync_push_all(struct pfsync_softc *sc)
+{
+ int c;
+ struct pfsync_bucket *b;
+
+ for (c = 0; c < pfsync_buckets; c++) {
+ b = &sc->sc_buckets[c];
+
+ PFSYNC_BUCKET_LOCK(b);
+ pfsync_push(b);
+ PFSYNC_BUCKET_UNLOCK(b);
+ }
+}
+
+static void
pfsyncintr(void *arg)
{
struct pfsync_softc *sc = arg;
+ struct pfsync_bucket *b;
struct mbuf *m, *n;
+ int c;
CURVNET_SET(sc->sc_ifp->if_vnet);
- PFSYNC_LOCK(sc);
- if ((sc->sc_flags & PFSYNCF_PUSH) && sc->sc_len > PFSYNC_MINPKT) {
- pfsync_sendout(0);
- sc->sc_flags &= ~PFSYNCF_PUSH;
- }
- _IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m);
- PFSYNC_UNLOCK(sc);
+ for (c = 0; c < pfsync_buckets; c++) {
+ b = &sc->sc_buckets[c];
- for (; m != NULL; m = n) {
+ PFSYNC_BUCKET_LOCK(b);
+ if ((b->b_flags & PFSYNCF_BUCKET_PUSH) && b->b_len > PFSYNC_MINPKT) {
+ pfsync_sendout(0, b->b_id);
+ b->b_flags &= ~PFSYNCF_BUCKET_PUSH;
+ }
+ _IF_DEQUEUE_ALL(&b->b_snd, m);
+ PFSYNC_BUCKET_UNLOCK(b);
- n = m->m_nextpkt;
- m->m_nextpkt = NULL;
+ for (; m != NULL; m = n) {
- /*
- * We distinguish between a deferral packet and our
- * own pfsync packet based on M_SKIP_FIREWALL
- * flag. This is XXX.
- */
- if (m->m_flags & M_SKIP_FIREWALL)
- ip_output(m, NULL, NULL, 0, NULL, NULL);
- else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
- NULL) == 0)
- V_pfsyncstats.pfsyncs_opackets++;
- else
- V_pfsyncstats.pfsyncs_oerrors++;
+ n = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+
+ /*
+ * We distinguish between a deferral packet and our
+ * own pfsync packet based on M_SKIP_FIREWALL
+ * flag. This is XXX.
+ */
+ if (m->m_flags & M_SKIP_FIREWALL)
+ ip_output(m, NULL, NULL, 0, NULL, NULL);
+ else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
+ NULL) == 0)
+ V_pfsyncstats.pfsyncs_opackets++;
+ else
+ V_pfsyncstats.pfsyncs_oerrors++;
+ }
}
CURVNET_RESTORE();
}

File Metadata

Mime Type
text/plain
Expires
Tue, Nov 19, 11:32 AM (21 h, 49 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14715702
Default Alt Text
D18373.diff (31 KB)

Event Timeline