Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F102897848
D18373.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
31 KB
Referenced Files
None
Subscribers
None
D18373.diff
View Options
Index: head/share/man/man4/pfsync.4
===================================================================
--- head/share/man/man4/pfsync.4
+++ head/share/man/man4/pfsync.4
@@ -26,7 +26,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd August 18, 2017
+.Dd December 6, 2018
.Dt PFSYNC 4
.Os
.Sh NAME
@@ -130,6 +130,13 @@
.Xr carp 4
for more information.
Default value is 240.
+.It Va net.pfsync.pfsync_buckets
+The number of
+.Nm
+buckets.
+This affects the performance and memory tradeoff.
+Defaults to twice the number of CPUs.
+Change only if benchmarks show this helps on your workload.
.El
.Sh EXAMPLES
.Nm
Index: head/sys/netpfil/pf/if_pfsync.c
===================================================================
--- head/sys/netpfil/pf/if_pfsync.c
+++ head/sys/netpfil/pf/if_pfsync.c
@@ -77,6 +77,7 @@
#include <sys/mutex.h>
#include <sys/priv.h>
#include <sys/protosw.h>
+#include <sys/smp.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
@@ -106,6 +107,8 @@
sizeof(struct pfsync_header) + \
sizeof(struct pfsync_subheader) )
+struct pfsync_bucket;
+
struct pfsync_pkt {
struct ip *ip;
struct in_addr src;
@@ -164,7 +167,7 @@
};
static void pfsync_q_ins(struct pf_state *, int, bool);
-static void pfsync_q_del(struct pf_state *, bool);
+static void pfsync_q_del(struct pf_state *, bool, struct pfsync_bucket *);
static void pfsync_update_state(struct pf_state *);
@@ -183,6 +186,28 @@
struct mbuf *pd_m;
};
+struct pfsync_sofct;
+
+struct pfsync_bucket
+{
+ int b_id;
+ struct pfsync_softc *b_sc;
+ struct mtx b_mtx;
+ struct callout b_tmo;
+ int b_flags;
+#define PFSYNCF_BUCKET_PUSH 0x00000001
+
+ size_t b_len;
+ TAILQ_HEAD(, pf_state) b_qs[PFSYNC_S_COUNT];
+ TAILQ_HEAD(, pfsync_upd_req_item) b_upd_req_list;
+ TAILQ_HEAD(, pfsync_deferral) b_deferrals;
+ u_int b_deferred;
+ void *b_plus;
+ size_t b_pluslen;
+
+ struct ifaltq b_snd;
+};
+
struct pfsync_softc {
/* Configuration */
struct ifnet *sc_ifp;
@@ -192,20 +217,12 @@
uint32_t sc_flags;
#define PFSYNCF_OK 0x00000001
#define PFSYNCF_DEFER 0x00000002
-#define PFSYNCF_PUSH 0x00000004
uint8_t sc_maxupdates;
struct ip sc_template;
- struct callout sc_tmo;
struct mtx sc_mtx;
/* Queued data */
- size_t sc_len;
- TAILQ_HEAD(, pf_state) sc_qs[PFSYNC_S_COUNT];
- TAILQ_HEAD(, pfsync_upd_req_item) sc_upd_req_list;
- TAILQ_HEAD(, pfsync_deferral) sc_deferrals;
- u_int sc_deferred;
- void *sc_plus;
- size_t sc_pluslen;
+ struct pfsync_bucket *sc_buckets;
/* Bulk update info */
struct mtx sc_bulk_mtx;
@@ -223,6 +240,10 @@
#define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx)
#define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED)
+#define PFSYNC_BUCKET_LOCK(b) mtx_lock(&(b)->b_mtx)
+#define PFSYNC_BUCKET_UNLOCK(b) mtx_unlock(&(b)->b_mtx)
+#define PFSYNC_BUCKET_LOCK_ASSERT(b) mtx_assert(&(b)->b_mtx, MA_OWNED)
+
#define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx)
#define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx)
#define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED)
@@ -239,7 +260,8 @@
#define V_pfsync_carp_adj VNET(pfsync_carp_adj)
static void pfsync_timeout(void *);
-static void pfsync_push(struct pfsync_softc *);
+static void pfsync_push(struct pfsync_bucket *);
+static void pfsync_push_all(struct pfsync_softc *);
static void pfsyncintr(void *);
static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *,
void *);
@@ -249,12 +271,16 @@
static int pfsync_init(void);
static void pfsync_uninit(void);
+static unsigned long pfsync_buckets;
+
SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC");
SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(pfsyncstats), pfsyncstats,
"PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW,
&VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment");
+SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN,
+ &pfsync_buckets, 0, "Number of pfsync hash buckets");
static int pfsync_clone_create(struct if_clone *, int, caddr_t);
static void pfsync_clone_destroy(struct ifnet *);
@@ -270,10 +296,10 @@
static void pfsync_defer_tmo(void *);
static void pfsync_request_update(u_int32_t, u_int64_t);
-static void pfsync_update_state_req(struct pf_state *);
+static bool pfsync_update_state_req(struct pf_state *);
static void pfsync_drop(struct pfsync_softc *);
-static void pfsync_sendout(int);
+static void pfsync_sendout(int, int);
static void pfsync_send_plus(void *, size_t);
static void pfsync_bulk_start(void);
@@ -285,7 +311,10 @@
#ifdef IPSEC
static void pfsync_update_net_tdb(struct pfsync_tdb *);
#endif
+static struct pfsync_bucket *pfsync_get_bucket(struct pfsync_softc *,
+ struct pf_state *);
+
#define PFSYNC_MAX_BULKTRIES 12
VNET_DEFINE(struct if_clone *, pfsync_cloner);
@@ -296,21 +325,16 @@
{
struct pfsync_softc *sc;
struct ifnet *ifp;
- int q;
+ struct pfsync_bucket *b;
+ int c, q;
if (unit != 0)
return (EINVAL);
- sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
- sc->sc_flags |= PFSYNCF_OK;
+ if (! pfsync_buckets)
+ pfsync_buckets = mp_ncpus * 2;
- for (q = 0; q < PFSYNC_S_COUNT; q++)
- TAILQ_INIT(&sc->sc_qs[q]);
-
- TAILQ_INIT(&sc->sc_upd_req_list);
- TAILQ_INIT(&sc->sc_deferrals);
-
- sc->sc_len = PFSYNC_MINPKT;
+ sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
sc->sc_maxupdates = 128;
ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
@@ -323,12 +347,10 @@
ifp->if_ioctl = pfsyncioctl;
ifp->if_output = pfsyncoutput;
ifp->if_type = IFT_PFSYNC;
- ifp->if_snd.ifq_maxlen = ifqmaxlen;
ifp->if_hdrlen = sizeof(struct pfsync_header);
ifp->if_mtu = ETHERMTU;
mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF);
mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF);
- callout_init(&sc->sc_tmo, 1);
callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0);
callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0);
@@ -336,6 +358,27 @@
bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
+ sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets),
+ M_PFSYNC, M_ZERO | M_WAITOK);
+ for (c = 0; c < pfsync_buckets; c++) {
+ b = &sc->sc_buckets[c];
+ mtx_init(&b->b_mtx, pfsyncname, NULL, MTX_DEF);
+
+ b->b_id = c;
+ b->b_sc = sc;
+ b->b_len = PFSYNC_MINPKT;
+
+ for (q = 0; q < PFSYNC_S_COUNT; q++)
+ TAILQ_INIT(&b->b_qs[q]);
+
+ TAILQ_INIT(&b->b_upd_req_list);
+ TAILQ_INIT(&b->b_deferrals);
+
+ callout_init(&b->b_tmo, 1);
+
+ b->b_snd.ifq_maxlen = ifqmaxlen;
+ }
+
V_pfsyncif = sc;
return (0);
@@ -345,29 +388,36 @@
pfsync_clone_destroy(struct ifnet *ifp)
{
struct pfsync_softc *sc = ifp->if_softc;
+ struct pfsync_bucket *b;
+ int c;
- /*
- * At this stage, everything should have already been
- * cleared by pfsync_uninit(), and we have only to
- * drain callouts.
- */
- while (sc->sc_deferred > 0) {
- struct pfsync_deferral *pd = TAILQ_FIRST(&sc->sc_deferrals);
+ for (c = 0; c < pfsync_buckets; c++) {
+ b = &sc->sc_buckets[c];
+ /*
+ * At this stage, everything should have already been
+ * cleared by pfsync_uninit(), and we have only to
+ * drain callouts.
+ */
+ while (b->b_deferred > 0) {
+ struct pfsync_deferral *pd =
+ TAILQ_FIRST(&b->b_deferrals);
- TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
- sc->sc_deferred--;
- if (callout_stop(&pd->pd_tmo) > 0) {
- pf_release_state(pd->pd_st);
- m_freem(pd->pd_m);
- free(pd, M_PFSYNC);
- } else {
- pd->pd_refs++;
- callout_drain(&pd->pd_tmo);
- free(pd, M_PFSYNC);
+ TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
+ b->b_deferred--;
+ if (callout_stop(&pd->pd_tmo) > 0) {
+ pf_release_state(pd->pd_st);
+ m_freem(pd->pd_m);
+ free(pd, M_PFSYNC);
+ } else {
+ pd->pd_refs++;
+ callout_drain(&pd->pd_tmo);
+ free(pd, M_PFSYNC);
+ }
}
+
+ callout_drain(&b->b_tmo);
}
- callout_drain(&sc->sc_tmo);
callout_drain(&sc->sc_bulkfail_tmo);
callout_drain(&sc->sc_bulk_tmo);
@@ -383,6 +433,8 @@
pfsync_multicast_cleanup(sc);
mtx_destroy(&sc->sc_mtx);
mtx_destroy(&sc->sc_bulk_mtx);
+
+ free(sc->sc_buckets, M_PFSYNC);
free(sc, M_PFSYNC);
V_pfsyncif = NULL;
@@ -546,7 +598,7 @@
st->state_flags &= ~PFSTATE_NOSYNC;
if (st->state_flags & PFSTATE_ACK) {
pfsync_q_ins(st, PFSYNC_S_IACK, true);
- pfsync_push(sc);
+ pfsync_push_all(sc);
}
}
st->state_flags &= ~PFSTATE_ACK;
@@ -785,9 +837,7 @@
continue;
if (st->state_flags & PFSTATE_ACK) {
- PFSYNC_LOCK(V_pfsyncif);
pfsync_undefer_state(st, 0);
- PFSYNC_UNLOCK(V_pfsyncif);
}
PF_STATE_UNLOCK(st);
}
@@ -876,9 +926,7 @@
}
if (st->state_flags & PFSTATE_ACK) {
- PFSYNC_LOCK(sc);
pfsync_undefer_state(st, 1);
- PFSYNC_UNLOCK(sc);
}
if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
@@ -912,9 +960,7 @@
pfsync_update_state(st);
PF_STATE_UNLOCK(st);
- PFSYNC_LOCK(sc);
- pfsync_push(sc);
- PFSYNC_UNLOCK(sc);
+ pfsync_push_all(sc);
continue;
}
PF_STATE_UNLOCK(st);
@@ -960,16 +1006,14 @@
st = pf_find_state_byid(up->id, up->creatorid);
if (st == NULL) {
/* We don't have this state. Ask for it. */
- PFSYNC_LOCK(sc);
+ PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]);
pfsync_request_update(up->creatorid, up->id);
- PFSYNC_UNLOCK(sc);
+ PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]);
continue;
}
if (st->state_flags & PFSTATE_ACK) {
- PFSYNC_LOCK(sc);
pfsync_undefer_state(st, 1);
- PFSYNC_UNLOCK(sc);
}
if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
@@ -1003,9 +1047,7 @@
pfsync_update_state(st);
PF_STATE_UNLOCK(st);
- PFSYNC_LOCK(sc);
- pfsync_push(sc);
- PFSYNC_UNLOCK(sc);
+ pfsync_push_all(sc);
continue;
}
PF_STATE_UNLOCK(st);
@@ -1283,6 +1325,7 @@
struct ifreq *ifr = (struct ifreq *)data;
struct pfsyncreq pfsyncr;
int error;
+ int c;
switch (cmd) {
case SIOCSIFFLAGS:
@@ -1303,10 +1346,12 @@
ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
return (EINVAL);
if (ifr->ifr_mtu < ifp->if_mtu) {
- PFSYNC_LOCK(sc);
- if (sc->sc_len > PFSYNC_MINPKT)
- pfsync_sendout(1);
- PFSYNC_UNLOCK(sc);
+ for (c = 0; c < pfsync_buckets; c++) {
+ PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]);
+ if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT)
+ pfsync_sendout(1, c);
+ PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]);
+ }
}
ifp->if_mtu = ifr->ifr_mtu;
break;
@@ -1379,12 +1424,16 @@
break;
}
- if (sc->sc_len > PFSYNC_MINPKT &&
- (sifp->if_mtu < sc->sc_ifp->if_mtu ||
- (sc->sc_sync_if != NULL &&
- sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
- sifp->if_mtu < MCLBYTES - sizeof(struct ip)))
- pfsync_sendout(1);
+ for (c = 0; c < pfsync_buckets; c++) {
+ PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]);
+ if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT &&
+ (sifp->if_mtu < sc->sc_ifp->if_mtu ||
+ (sc->sc_sync_if != NULL &&
+ sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
+ sifp->if_mtu < MCLBYTES - sizeof(struct ip)))
+ pfsync_sendout(1, c);
+ PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]);
+ }
if (imo->imo_membership)
pfsync_multicast_cleanup(sc);
@@ -1421,8 +1470,10 @@
sc->sc_flags &= ~PFSYNCF_OK;
if (V_pf_status.debug >= PF_DEBUG_MISC)
printf("pfsync: requesting bulk update\n");
- pfsync_request_update(0, 0);
PFSYNC_UNLOCK(sc);
+ PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]);
+ pfsync_request_update(0, 0);
+ PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]);
PFSYNC_BLOCK(sc);
sc->sc_ureq_sent = time_uptime;
callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail,
@@ -1483,33 +1534,37 @@
{
struct pf_state *st, *next;
struct pfsync_upd_req_item *ur;
- int q;
+ struct pfsync_bucket *b;
+ int c, q;
- for (q = 0; q < PFSYNC_S_COUNT; q++) {
- if (TAILQ_EMPTY(&sc->sc_qs[q]))
- continue;
+ for (c = 0; c < pfsync_buckets; c++) {
+ b = &sc->sc_buckets[c];
+ for (q = 0; q < PFSYNC_S_COUNT; q++) {
+ if (TAILQ_EMPTY(&b->b_qs[q]))
+ continue;
- TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) {
- KASSERT(st->sync_state == q,
- ("%s: st->sync_state == q",
- __func__));
- st->sync_state = PFSYNC_S_NONE;
- pf_release_state(st);
+ TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, next) {
+ KASSERT(st->sync_state == q,
+ ("%s: st->sync_state == q",
+ __func__));
+ st->sync_state = PFSYNC_S_NONE;
+ pf_release_state(st);
+ }
+ TAILQ_INIT(&b->b_qs[q]);
}
- TAILQ_INIT(&sc->sc_qs[q]);
- }
- while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
- TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
- free(ur, M_PFSYNC);
- }
+ while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) {
+ TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry);
+ free(ur, M_PFSYNC);
+ }
- sc->sc_plus = NULL;
- sc->sc_len = PFSYNC_MINPKT;
+ b->b_len = PFSYNC_MINPKT;
+ b->b_plus = NULL;
+ }
}
static void
-pfsync_sendout(int schedswi)
+pfsync_sendout(int schedswi, int c)
{
struct pfsync_softc *sc = V_pfsyncif;
struct ifnet *ifp = sc->sc_ifp;
@@ -1519,27 +1574,28 @@
struct pfsync_subheader *subh;
struct pf_state *st, *st_next;
struct pfsync_upd_req_item *ur;
+ struct pfsync_bucket *b = &sc->sc_buckets[c];
int offset;
int q, count = 0;
KASSERT(sc != NULL, ("%s: null sc", __func__));
- KASSERT(sc->sc_len > PFSYNC_MINPKT,
- ("%s: sc_len %zu", __func__, sc->sc_len));
- PFSYNC_LOCK_ASSERT(sc);
+ KASSERT(b->b_len > PFSYNC_MINPKT,
+ ("%s: sc_len %zu", __func__, b->b_len));
+ PFSYNC_BUCKET_LOCK_ASSERT(b);
if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) {
pfsync_drop(sc);
return;
}
- m = m_get2(max_linkhdr + sc->sc_len, M_NOWAIT, MT_DATA, M_PKTHDR);
+ m = m_get2(max_linkhdr + b->b_len, M_NOWAIT, MT_DATA, M_PKTHDR);
if (m == NULL) {
if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
V_pfsyncstats.pfsyncs_onomem++;
return;
}
m->m_data += max_linkhdr;
- m->m_len = m->m_pkthdr.len = sc->sc_len;
+ m->m_len = m->m_pkthdr.len = b->b_len;
/* build the ip header */
ip = (struct ip *)m->m_data;
@@ -1555,19 +1611,19 @@
offset += sizeof(*ph);
ph->version = PFSYNC_VERSION;
- ph->len = htons(sc->sc_len - sizeof(*ip));
+ ph->len = htons(b->b_len - sizeof(*ip));
bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
/* walk the queues */
for (q = 0; q < PFSYNC_S_COUNT; q++) {
- if (TAILQ_EMPTY(&sc->sc_qs[q]))
+ if (TAILQ_EMPTY(&b->b_qs[q]))
continue;
subh = (struct pfsync_subheader *)(m->m_data + offset);
offset += sizeof(*subh);
count = 0;
- TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, st_next) {
+ TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, st_next) {
KASSERT(st->sync_state == q,
("%s: st->sync_state == q",
__func__));
@@ -1581,7 +1637,7 @@
pf_release_state(st);
count++;
}
- TAILQ_INIT(&sc->sc_qs[q]);
+ TAILQ_INIT(&b->b_qs[q]);
bzero(subh, sizeof(*subh));
subh->action = pfsync_qs[q].action;
@@ -1589,13 +1645,13 @@
V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count;
}
- if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
+ if (!TAILQ_EMPTY(&b->b_upd_req_list)) {
subh = (struct pfsync_subheader *)(m->m_data + offset);
offset += sizeof(*subh);
count = 0;
- while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
- TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
+ while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) {
+ TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry);
bcopy(&ur->ur_msg, m->m_data + offset,
sizeof(ur->ur_msg));
@@ -1611,11 +1667,11 @@
}
/* has someone built a custom region for us to add? */
- if (sc->sc_plus != NULL) {
- bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
- offset += sc->sc_pluslen;
+ if (b->b_plus != NULL) {
+ bcopy(b->b_plus, m->m_data + offset, b->b_pluslen);
+ offset += b->b_pluslen;
- sc->sc_plus = NULL;
+ b->b_plus = NULL;
}
subh = (struct pfsync_subheader *)(m->m_data + offset);
@@ -1629,24 +1685,24 @@
/* we're done, let's put it on the wire */
if (ifp->if_bpf) {
m->m_data += sizeof(*ip);
- m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
+ m->m_len = m->m_pkthdr.len = b->b_len - sizeof(*ip);
BPF_MTAP(ifp, m);
m->m_data -= sizeof(*ip);
- m->m_len = m->m_pkthdr.len = sc->sc_len;
+ m->m_len = m->m_pkthdr.len = b->b_len;
}
if (sc->sc_sync_if == NULL) {
- sc->sc_len = PFSYNC_MINPKT;
+ b->b_len = PFSYNC_MINPKT;
m_freem(m);
return;
}
if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
- sc->sc_len = PFSYNC_MINPKT;
+ b->b_len = PFSYNC_MINPKT;
- if (!_IF_QFULL(&sc->sc_ifp->if_snd))
- _IF_ENQUEUE(&sc->sc_ifp->if_snd, m);
+ if (!_IF_QFULL(&b->b_snd))
+ _IF_ENQUEUE(&b->b_snd, m);
else {
m_freem(m);
if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
@@ -1659,6 +1715,7 @@
pfsync_insert_state(struct pf_state *st)
{
struct pfsync_softc *sc = V_pfsyncif;
+ struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
if (st->state_flags & PFSTATE_NOSYNC)
return;
@@ -1672,12 +1729,12 @@
KASSERT(st->sync_state == PFSYNC_S_NONE,
("%s: st->sync_state %u", __func__, st->sync_state));
- PFSYNC_LOCK(sc);
- if (sc->sc_len == PFSYNC_MINPKT)
- callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
+ PFSYNC_BUCKET_LOCK(b);
+ if (b->b_len == PFSYNC_MINPKT)
+ callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
pfsync_q_ins(st, PFSYNC_S_INS, true);
- PFSYNC_UNLOCK(sc);
+ PFSYNC_BUCKET_UNLOCK(b);
st->sync_updates = 0;
}
@@ -1687,6 +1744,7 @@
{
struct pfsync_softc *sc = V_pfsyncif;
struct pfsync_deferral *pd;
+ struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
if (m->m_flags & (M_BCAST|M_MCAST))
return (0);
@@ -1699,13 +1757,13 @@
return (0);
}
- if (sc->sc_deferred >= 128)
- pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
+ if (b->b_deferred >= 128)
+ pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0);
pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT);
if (pd == NULL)
return (0);
- sc->sc_deferred++;
+ b->b_deferred++;
m->m_flags |= M_SKIP_FIREWALL;
st->state_flags |= PFSTATE_ACK;
@@ -1716,11 +1774,11 @@
pf_ref_state(st);
pd->pd_m = m;
- TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
- callout_init_mtx(&pd->pd_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
+ TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry);
+ callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED);
callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd);
- pfsync_push(sc);
+ pfsync_push(b);
return (1);
}
@@ -1731,11 +1789,12 @@
struct pfsync_softc *sc = pd->pd_sc;
struct mbuf *m = pd->pd_m;
struct pf_state *st = pd->pd_st;
+ struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
- PFSYNC_LOCK_ASSERT(sc);
+ PFSYNC_BUCKET_LOCK_ASSERT(b);
- TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
- sc->sc_deferred--;
+ TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
+ b->b_deferred--;
pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */
free(pd, M_PFSYNC);
pf_release_state(st);
@@ -1743,8 +1802,8 @@
if (drop)
m_freem(m);
else {
- _IF_ENQUEUE(&sc->sc_ifp->if_snd, m);
- pfsync_push(sc);
+ _IF_ENQUEUE(&b->b_snd, m);
+ pfsync_push(b);
}
}
@@ -1755,13 +1814,14 @@
struct pfsync_softc *sc = pd->pd_sc;
struct mbuf *m = pd->pd_m;
struct pf_state *st = pd->pd_st;
+ struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
- PFSYNC_LOCK_ASSERT(sc);
+ PFSYNC_BUCKET_LOCK_ASSERT(b);
CURVNET_SET(m->m_pkthdr.rcvif->if_vnet);
- TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
- sc->sc_deferred--;
+ TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
+ b->b_deferred--;
pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */
if (pd->pd_refs == 0)
free(pd, M_PFSYNC);
@@ -1779,40 +1839,52 @@
{
struct pfsync_softc *sc = V_pfsyncif;
struct pfsync_deferral *pd;
+ struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
- PFSYNC_LOCK_ASSERT(sc);
+ PFSYNC_BUCKET_LOCK(b);
- TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
+ TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) {
if (pd->pd_st == st) {
if (callout_stop(&pd->pd_tmo) > 0)
pfsync_undefer(pd, drop);
+
+ PFSYNC_BUCKET_UNLOCK(b);
return;
}
}
+ PFSYNC_BUCKET_UNLOCK(b);
panic("%s: unable to find deferred state", __func__);
}
+static struct pfsync_bucket*
+pfsync_get_bucket(struct pfsync_softc *sc, struct pf_state *st)
+{
+ int c = PF_IDHASH(st) % pfsync_buckets;
+ return &sc->sc_buckets[c];
+}
+
static void
pfsync_update_state(struct pf_state *st)
{
struct pfsync_softc *sc = V_pfsyncif;
bool sync = false, ref = true;
+ struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
PF_STATE_LOCK_ASSERT(st);
- PFSYNC_LOCK(sc);
+ PFSYNC_BUCKET_LOCK(b);
if (st->state_flags & PFSTATE_ACK)
pfsync_undefer_state(st, 0);
if (st->state_flags & PFSTATE_NOSYNC) {
if (st->sync_state != PFSYNC_S_NONE)
- pfsync_q_del(st, true);
- PFSYNC_UNLOCK(sc);
+ pfsync_q_del(st, true, b);
+ PFSYNC_BUCKET_UNLOCK(b);
return;
}
- if (sc->sc_len == PFSYNC_MINPKT)
- callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
+ if (b->b_len == PFSYNC_MINPKT)
+ callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
switch (st->sync_state) {
case PFSYNC_S_UPD_C:
@@ -1828,7 +1900,7 @@
break;
case PFSYNC_S_IACK:
- pfsync_q_del(st, false);
+ pfsync_q_del(st, false, b);
ref = false;
/* FALLTHROUGH */
@@ -1842,26 +1914,27 @@
}
if (sync || (time_uptime - st->pfsync_time) < 2)
- pfsync_push(sc);
+ pfsync_push(b);
- PFSYNC_UNLOCK(sc);
+ PFSYNC_BUCKET_UNLOCK(b);
}
static void
pfsync_request_update(u_int32_t creatorid, u_int64_t id)
{
struct pfsync_softc *sc = V_pfsyncif;
+ struct pfsync_bucket *b = &sc->sc_buckets[0];
struct pfsync_upd_req_item *item;
size_t nlen = sizeof(struct pfsync_upd_req);
- PFSYNC_LOCK_ASSERT(sc);
+ PFSYNC_BUCKET_LOCK_ASSERT(b);
/*
* This code does a bit to prevent multiple update requests for the
* same state being generated. It searches current subheader queue,
* but it doesn't lookup into queue of already packed datagrams.
*/
- TAILQ_FOREACH(item, &sc->sc_upd_req_list, ur_entry)
+ TAILQ_FOREACH(item, &b->b_upd_req_list, ur_entry)
if (item->ur_msg.id == id &&
item->ur_msg.creatorid == creatorid)
return;
@@ -1873,46 +1946,47 @@
item->ur_msg.id = id;
item->ur_msg.creatorid = creatorid;
- if (TAILQ_EMPTY(&sc->sc_upd_req_list))
+ if (TAILQ_EMPTY(&b->b_upd_req_list))
nlen += sizeof(struct pfsync_subheader);
- if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
- pfsync_sendout(1);
+ if (b->b_len + nlen > sc->sc_ifp->if_mtu) {
+ pfsync_sendout(1, 0);
nlen = sizeof(struct pfsync_subheader) +
sizeof(struct pfsync_upd_req);
}
- TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
- sc->sc_len += nlen;
+ TAILQ_INSERT_TAIL(&b->b_upd_req_list, item, ur_entry);
+ b->b_len += nlen;
}
-static void
+static bool
pfsync_update_state_req(struct pf_state *st)
{
struct pfsync_softc *sc = V_pfsyncif;
- bool ref = true;
+ bool ref = true, full = false;
+ struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
PF_STATE_LOCK_ASSERT(st);
- PFSYNC_LOCK(sc);
+ PFSYNC_BUCKET_LOCK(b);
if (st->state_flags & PFSTATE_NOSYNC) {
if (st->sync_state != PFSYNC_S_NONE)
- pfsync_q_del(st, true);
- PFSYNC_UNLOCK(sc);
- return;
+ pfsync_q_del(st, true, b);
+ PFSYNC_BUCKET_UNLOCK(b);
+ return (full);
}
switch (st->sync_state) {
case PFSYNC_S_UPD_C:
case PFSYNC_S_IACK:
- pfsync_q_del(st, false);
+ pfsync_q_del(st, false, b);
ref = false;
/* FALLTHROUGH */
case PFSYNC_S_NONE:
pfsync_q_ins(st, PFSYNC_S_UPD, ref);
- pfsync_push(sc);
+ pfsync_push(b);
break;
case PFSYNC_S_INS:
@@ -1925,38 +1999,44 @@
panic("%s: unexpected sync state %d", __func__, st->sync_state);
}
- PFSYNC_UNLOCK(sc);
+ if ((sc->sc_ifp->if_mtu - b->b_len) < sizeof(struct pfsync_state))
+ full = true;
+
+ PFSYNC_BUCKET_UNLOCK(b);
+
+ return (full);
}
static void
pfsync_delete_state(struct pf_state *st)
{
struct pfsync_softc *sc = V_pfsyncif;
+ struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
bool ref = true;
- PFSYNC_LOCK(sc);
+ PFSYNC_BUCKET_LOCK(b);
if (st->state_flags & PFSTATE_ACK)
pfsync_undefer_state(st, 1);
if (st->state_flags & PFSTATE_NOSYNC) {
if (st->sync_state != PFSYNC_S_NONE)
- pfsync_q_del(st, true);
- PFSYNC_UNLOCK(sc);
+ pfsync_q_del(st, true, b);
+ PFSYNC_BUCKET_UNLOCK(b);
return;
}
- if (sc->sc_len == PFSYNC_MINPKT)
- callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
+ if (b->b_len == PFSYNC_MINPKT)
+ callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
switch (st->sync_state) {
case PFSYNC_S_INS:
/* We never got to tell the world so just forget about it. */
- pfsync_q_del(st, true);
+ pfsync_q_del(st, true, b);
break;
case PFSYNC_S_UPD_C:
case PFSYNC_S_UPD:
case PFSYNC_S_IACK:
- pfsync_q_del(st, false);
+ pfsync_q_del(st, false, b);
ref = false;
/* FALLTHROUGH */
@@ -1968,13 +2048,12 @@
panic("%s: unexpected sync state %d", __func__, st->sync_state);
}
- PFSYNC_UNLOCK(sc);
+ PFSYNC_BUCKET_UNLOCK(b);
}
static void
pfsync_clear_states(u_int32_t creatorid, const char *ifname)
{
- struct pfsync_softc *sc = V_pfsyncif;
struct {
struct pfsync_subheader subh;
struct pfsync_clr clr;
@@ -1989,9 +2068,7 @@
strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
r.clr.creatorid = creatorid;
- PFSYNC_LOCK(sc);
pfsync_send_plus(&r, sizeof(r));
- PFSYNC_UNLOCK(sc);
}
static void
@@ -1999,48 +2076,48 @@
{
struct pfsync_softc *sc = V_pfsyncif;
size_t nlen = pfsync_qs[q].len;
+ struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
- PFSYNC_LOCK_ASSERT(sc);
+ PFSYNC_BUCKET_LOCK_ASSERT(b);
KASSERT(st->sync_state == PFSYNC_S_NONE,
("%s: st->sync_state %u", __func__, st->sync_state));
- KASSERT(sc->sc_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu",
- sc->sc_len));
+ KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu",
+ b->b_len));
- if (TAILQ_EMPTY(&sc->sc_qs[q]))
+ if (TAILQ_EMPTY(&b->b_qs[q]))
nlen += sizeof(struct pfsync_subheader);
- if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
- pfsync_sendout(1);
+ if (b->b_len + nlen > sc->sc_ifp->if_mtu) {
+ pfsync_sendout(1, b->b_id);
nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
}
- sc->sc_len += nlen;
- TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
+ b->b_len += nlen;
+ TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list);
st->sync_state = q;
if (ref)
pf_ref_state(st);
}
static void
-pfsync_q_del(struct pf_state *st, bool unref)
+pfsync_q_del(struct pf_state *st, bool unref, struct pfsync_bucket *b)
{
- struct pfsync_softc *sc = V_pfsyncif;
int q = st->sync_state;
- PFSYNC_LOCK_ASSERT(sc);
+ PFSYNC_BUCKET_LOCK_ASSERT(b);
KASSERT(st->sync_state != PFSYNC_S_NONE,
("%s: st->sync_state != PFSYNC_S_NONE", __func__));
- sc->sc_len -= pfsync_qs[q].len;
- TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
+ b->b_len -= pfsync_qs[q].len;
+ TAILQ_REMOVE(&b->b_qs[q], st, sync_list);
st->sync_state = PFSYNC_S_NONE;
if (unref)
pf_release_state(st);
- if (TAILQ_EMPTY(&sc->sc_qs[q]))
- sc->sc_len -= sizeof(struct pfsync_subheader);
+ if (TAILQ_EMPTY(&b->b_qs[q]))
+ b->b_len -= sizeof(struct pfsync_subheader);
}
static void
@@ -2094,23 +2171,19 @@
}
for (; s; s = LIST_NEXT(s, entry)) {
-
- if (sent > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) <
- sizeof(struct pfsync_state)) {
- /* We've filled a packet. */
- sc->sc_bulk_hashid = i;
- sc->sc_bulk_stateid = s->id;
- sc->sc_bulk_creatorid = s->creatorid;
- PF_HASHROW_UNLOCK(ih);
- callout_reset(&sc->sc_bulk_tmo, 1,
- pfsync_bulk_update, sc);
- goto full;
- }
-
if (s->sync_state == PFSYNC_S_NONE &&
s->timeout < PFTM_MAX &&
s->pfsync_time <= sc->sc_ureq_received) {
- pfsync_update_state_req(s);
+ if (pfsync_update_state_req(s)) {
+ /* We've filled a packet. */
+ sc->sc_bulk_hashid = i;
+ sc->sc_bulk_stateid = s->id;
+ sc->sc_bulk_creatorid = s->creatorid;
+ PF_HASHROW_UNLOCK(ih);
+ callout_reset(&sc->sc_bulk_tmo, 1,
+ pfsync_bulk_update, sc);
+ goto full;
+ }
sent++;
}
}
@@ -2119,7 +2192,6 @@
/* We're done. */
pfsync_bulk_status(PFSYNC_BUS_END);
-
full:
CURVNET_RESTORE();
}
@@ -2144,15 +2216,14 @@
r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
r.bus.status = status;
- PFSYNC_LOCK(sc);
pfsync_send_plus(&r, sizeof(r));
- PFSYNC_UNLOCK(sc);
}
static void
pfsync_bulk_fail(void *arg)
{
struct pfsync_softc *sc = arg;
+ struct pfsync_bucket *b = &sc->sc_buckets[0];
CURVNET_SET(sc->sc_ifp->if_vnet);
@@ -2162,9 +2233,9 @@
/* Try again */
callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
pfsync_bulk_fail, V_pfsyncif);
- PFSYNC_LOCK(sc);
+ PFSYNC_BUCKET_LOCK(b);
pfsync_request_update(0, 0);
- PFSYNC_UNLOCK(sc);
+ PFSYNC_BUCKET_UNLOCK(b);
} else {
/* Pretend like the transfer was ok. */
sc->sc_ureq_sent = 0;
@@ -2186,73 +2257,96 @@
pfsync_send_plus(void *plus, size_t pluslen)
{
struct pfsync_softc *sc = V_pfsyncif;
+ struct pfsync_bucket *b = &sc->sc_buckets[0];
- PFSYNC_LOCK_ASSERT(sc);
+ PFSYNC_BUCKET_LOCK(b);
- if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu)
- pfsync_sendout(1);
+ if (b->b_len + pluslen > sc->sc_ifp->if_mtu)
+ pfsync_sendout(1, b->b_id);
- sc->sc_plus = plus;
- sc->sc_len += (sc->sc_pluslen = pluslen);
+ b->b_plus = plus;
+ b->b_len += (b->b_pluslen = pluslen);
- pfsync_sendout(1);
+ pfsync_sendout(1, b->b_id);
+ PFSYNC_BUCKET_UNLOCK(b);
}
static void
pfsync_timeout(void *arg)
{
- struct pfsync_softc *sc = arg;
+ struct pfsync_bucket *b = arg;
- CURVNET_SET(sc->sc_ifp->if_vnet);
- PFSYNC_LOCK(sc);
- pfsync_push(sc);
- PFSYNC_UNLOCK(sc);
+ CURVNET_SET(b->b_sc->sc_ifp->if_vnet);
+ PFSYNC_BUCKET_LOCK(b);
+ pfsync_push(b);
+ PFSYNC_BUCKET_UNLOCK(b);
CURVNET_RESTORE();
}
static void
-pfsync_push(struct pfsync_softc *sc)
+pfsync_push(struct pfsync_bucket *b)
{
- PFSYNC_LOCK_ASSERT(sc);
+ PFSYNC_BUCKET_LOCK_ASSERT(b);
- sc->sc_flags |= PFSYNCF_PUSH;
+ b->b_flags |= PFSYNCF_BUCKET_PUSH;
swi_sched(V_pfsync_swi_cookie, 0);
}
static void
+pfsync_push_all(struct pfsync_softc *sc)
+{
+ int c;
+ struct pfsync_bucket *b;
+
+ for (c = 0; c < pfsync_buckets; c++) {
+ b = &sc->sc_buckets[c];
+
+ PFSYNC_BUCKET_LOCK(b);
+ pfsync_push(b);
+ PFSYNC_BUCKET_UNLOCK(b);
+ }
+}
+
+static void
pfsyncintr(void *arg)
{
struct pfsync_softc *sc = arg;
+ struct pfsync_bucket *b;
struct mbuf *m, *n;
+ int c;
CURVNET_SET(sc->sc_ifp->if_vnet);
- PFSYNC_LOCK(sc);
- if ((sc->sc_flags & PFSYNCF_PUSH) && sc->sc_len > PFSYNC_MINPKT) {
- pfsync_sendout(0);
- sc->sc_flags &= ~PFSYNCF_PUSH;
- }
- _IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m);
- PFSYNC_UNLOCK(sc);
+ for (c = 0; c < pfsync_buckets; c++) {
+ b = &sc->sc_buckets[c];
- for (; m != NULL; m = n) {
+ PFSYNC_BUCKET_LOCK(b);
+ if ((b->b_flags & PFSYNCF_BUCKET_PUSH) && b->b_len > PFSYNC_MINPKT) {
+ pfsync_sendout(0, b->b_id);
+ b->b_flags &= ~PFSYNCF_BUCKET_PUSH;
+ }
+ _IF_DEQUEUE_ALL(&b->b_snd, m);
+ PFSYNC_BUCKET_UNLOCK(b);
- n = m->m_nextpkt;
- m->m_nextpkt = NULL;
+ for (; m != NULL; m = n) {
- /*
- * We distinguish between a deferral packet and our
- * own pfsync packet based on M_SKIP_FIREWALL
- * flag. This is XXX.
- */
- if (m->m_flags & M_SKIP_FIREWALL)
- ip_output(m, NULL, NULL, 0, NULL, NULL);
- else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
- NULL) == 0)
- V_pfsyncstats.pfsyncs_opackets++;
- else
- V_pfsyncstats.pfsyncs_oerrors++;
+ n = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+
+ /*
+ * We distinguish between a deferral packet and our
+ * own pfsync packet based on M_SKIP_FIREWALL
+ * flag. This is XXX.
+ */
+ if (m->m_flags & M_SKIP_FIREWALL)
+ ip_output(m, NULL, NULL, 0, NULL, NULL);
+ else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
+ NULL) == 0)
+ V_pfsyncstats.pfsyncs_opackets++;
+ else
+ V_pfsyncstats.pfsyncs_oerrors++;
+ }
}
CURVNET_RESTORE();
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Nov 19, 11:32 AM (21 h, 49 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14715702
Default Alt Text
D18373.diff (31 KB)
Attached To
Mode
D18373: pfsync: Performance improvement
Attached
Detach File
Event Timeline
Log In to Comment