Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F102719634
D22827.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
17 KB
Referenced Files
None
Subscribers
None
D22827.diff
View Options
Index: head/sys/vm/uma_core.c
===================================================================
--- head/sys/vm/uma_core.c
+++ head/sys/vm/uma_core.c
@@ -75,6 +75,7 @@
#include <sys/rwlock.h>
#include <sys/sbuf.h>
#include <sys/sched.h>
+#include <sys/sleepqueue.h>
#include <sys/smp.h>
#include <sys/taskqueue.h>
#include <sys/vmmeter.h>
@@ -267,8 +268,9 @@
static void uma_timeout(void *);
static void uma_startup3(void);
static void *zone_alloc_item(uma_zone_t, void *, int, int);
-static void *zone_alloc_item_locked(uma_zone_t, void *, int, int);
static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
+static int zone_alloc_limit(uma_zone_t zone, int count, int flags);
+static void zone_free_limit(uma_zone_t zone, int count);
static void bucket_enable(void);
static void bucket_init(void);
static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
@@ -290,6 +292,7 @@
static int sysctl_handle_uma_zone_frees(SYSCTL_HANDLER_ARGS);
static int sysctl_handle_uma_zone_flags(SYSCTL_HANDLER_ARGS);
static int sysctl_handle_uma_slab_efficiency(SYSCTL_HANDLER_ARGS);
+static int sysctl_handle_uma_zone_items(SYSCTL_HANDLER_ARGS);
#ifdef INVARIANTS
static inline struct noslabbits *slab_dbg_bits(uma_slab_t slab, uma_keg_t keg);
@@ -893,7 +896,7 @@
*
* Arguments:
* zone The zone to free to, must be unlocked.
- * bucket The free/alloc bucket with items, cpu queue must be locked.
+ * bucket The free/alloc bucket with items.
*
* Returns:
* Nothing
@@ -904,20 +907,15 @@
{
int i;
- if (bucket == NULL)
+ if (bucket == NULL || bucket->ub_cnt == 0)
return;
if (zone->uz_fini)
for (i = 0; i < bucket->ub_cnt; i++)
zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
- if (zone->uz_max_items > 0) {
- ZONE_LOCK(zone);
- zone->uz_items -= bucket->ub_cnt;
- if (zone->uz_sleepers && zone->uz_items < zone->uz_max_items)
- wakeup_one(zone);
- ZONE_UNLOCK(zone);
- }
+ if (zone->uz_max_items > 0)
+ zone_free_limit(zone, bucket->ub_cnt);
bucket->ub_cnt = 0;
}
@@ -2096,10 +2094,11 @@
*/
oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO,
"limit", CTLFLAG_RD, NULL, "");
+ SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
+ "items", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE,
+ zone, 0, sysctl_handle_uma_zone_items, "QU",
+ "current number of allocated items if limit is set");
SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
- "items", CTLFLAG_RD, &zone->uz_items, 0,
- "current number of cached items");
- SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
"max_items", CTLFLAG_RD, &zone->uz_max_items, 0,
"Maximum number of cached items");
SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
@@ -2108,6 +2107,12 @@
SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
"sleeps", CTLFLAG_RD, &zone->uz_sleeps, 0,
"Total zone limit sleeps");
+ SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
+ "bucket_max", CTLFLAG_RD, &zone->uz_bkt_max, 0,
+ "Maximum number of items in the bucket cache");
+ SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO,
+ "bucket_cnt", CTLFLAG_RD, &zone->uz_bkt_count, 0,
+ "Number of items in the bucket cache");
/*
* Per-domain information.
@@ -2961,15 +2966,15 @@
domain = PCPU_GET(domain);
else
domain = UMA_ANYDOMAIN;
- return (zone_alloc_item_locked(zone, udata, domain, flags));
+ return (zone_alloc_item(zone, udata, domain, flags));
}
/*
* Replenish an alloc bucket and possibly restore an old one. Called in
* a critical section. Returns in a critical section.
*
- * A false return value indicates failure and returns with the zone lock
- * held. A true return value indicates success and the caller should retry.
+ * A false return value indicates an allocation failure.
+ * A true return value indicates success and the caller should retry.
*/
static __noinline bool
cache_alloc(uma_zone_t zone, uma_cache_t cache, void *udata, int flags)
@@ -2998,6 +3003,12 @@
if (bucket != NULL)
bucket_free(zone, bucket, udata);
+ /* Short-circuit for zones without buckets and low memory. */
+ if (zone->uz_bucket_size == 0 || bucketdisable) {
+ critical_enter();
+ return (false);
+ }
+
/*
* Attempt to retrieve the item from the per-CPU cache has failed, so
* we must go back to the zone. This requires the zone lock, so we
@@ -3014,14 +3025,9 @@
lockfail = 1;
}
+ /* See if we lost the race to fill the cache. */
critical_enter();
- /* Short-circuit for zones without buckets and low memory. */
- if (zone->uz_bucket_size == 0 || bucketdisable)
- return (false);
-
cache = &zone->uz_cpu[curcpu];
-
- /* See if we lost the race to fill the cache. */
if (cache->uc_allocbucket.ucb_bucket != NULL) {
ZONE_UNLOCK(zone);
return (true);
@@ -3054,6 +3060,7 @@
*/
if (lockfail && zone->uz_bucket_size < zone->uz_bucket_size_max)
zone->uz_bucket_size++;
+ ZONE_UNLOCK(zone);
/*
* Fill a bucket and attempt to use it as the alloc bucket.
@@ -3061,15 +3068,18 @@
bucket = zone_alloc_bucket(zone, udata, domain, flags);
CTR3(KTR_UMA, "uma_zalloc: zone %s(%p) bucket zone returned %p",
zone->uz_name, zone, bucket);
- critical_enter();
- if (bucket == NULL)
+ if (bucket == NULL) {
+ critical_enter();
return (false);
+ }
/*
* See if we lost the race or were migrated. Cache the
* initialized bucket to make this less likely or claim
* the memory directly.
*/
+ ZONE_LOCK(zone);
+ critical_enter();
cache = &zone->uz_cpu[curcpu];
if (cache->uc_allocbucket.ucb_bucket == NULL &&
((zone->uz_flags & UMA_ZONE_NUMA) == 0 ||
@@ -3202,10 +3212,6 @@
if (flags & M_NOVM)
break;
- KASSERT(zone->uz_max_items == 0 ||
- zone->uz_items <= zone->uz_max_items,
- ("%s: zone %p overflow", __func__, zone));
-
slab = keg_alloc_slab(keg, zone, domain, flags, aflags);
/*
* If we got a slab here it's safe to mark it partially used
@@ -3316,6 +3322,159 @@
return i;
}
+static int
+zone_alloc_limit_hard(uma_zone_t zone, int count, int flags)
+{
+ uint64_t old, new, total, max;
+
+ /*
+ * The hard case. We're going to sleep because there were existing
+ * sleepers or because we ran out of items. This routine enforces
+ * fairness by keeping fifo order.
+ *
+ * First release our ill gotten gains and make some noise.
+ */
+ for (;;) {
+ zone_free_limit(zone, count);
+ zone_log_warning(zone);
+ zone_maxaction(zone);
+ if (flags & M_NOWAIT)
+ return (0);
+
+ /*
+ * We need to allocate an item or set ourself as a sleeper
+ * while the sleepq lock is held to avoid wakeup races. This
+ * is essentially a home rolled semaphore.
+ */
+ sleepq_lock(&zone->uz_max_items);
+ old = zone->uz_items;
+ do {
+ MPASS(UZ_ITEMS_SLEEPERS(old) < UZ_ITEMS_SLEEPERS_MAX);
+ /* Cache the max since we will evaluate twice. */
+ max = zone->uz_max_items;
+ if (UZ_ITEMS_SLEEPERS(old) != 0 ||
+ UZ_ITEMS_COUNT(old) >= max)
+ new = old + UZ_ITEMS_SLEEPER;
+ else
+ new = old + MIN(count, max - old);
+ } while (atomic_fcmpset_64(&zone->uz_items, &old, new) == 0);
+
+ /* We may have successfully allocated under the sleepq lock. */
+ if (UZ_ITEMS_SLEEPERS(new) == 0) {
+ sleepq_release(&zone->uz_max_items);
+ return (new - old);
+ }
+
+ /*
+ * This is in a different cacheline from uz_items so that we
+ * don't constantly invalidate the fastpath cacheline when we
+ * adjust item counts. This could be limited to toggling on
+ * transitions.
+ */
+ atomic_add_32(&zone->uz_sleepers, 1);
+ atomic_add_64(&zone->uz_sleeps, 1);
+
+ /*
+ * We have added ourselves as a sleeper. The sleepq lock
+ * protects us from wakeup races. Sleep now and then retry.
+ */
+ sleepq_add(&zone->uz_max_items, NULL, "zonelimit", 0, 0);
+ sleepq_wait(&zone->uz_max_items, PVM);
+
+ /*
+ * After wakeup, remove ourselves as a sleeper and try
+ * again. We no longer have the sleepq lock for protection.
+ *
+ * Subract ourselves as a sleeper while attempting to add
+ * our count.
+ */
+ atomic_subtract_32(&zone->uz_sleepers, 1);
+ old = atomic_fetchadd_64(&zone->uz_items,
+ -(UZ_ITEMS_SLEEPER - count));
+ /* We're no longer a sleeper. */
+ old -= UZ_ITEMS_SLEEPER;
+
+ /*
+ * If we're still at the limit, restart. Notably do not
+ * block on other sleepers. Cache the max value to protect
+ * against changes via sysctl.
+ */
+ total = UZ_ITEMS_COUNT(old);
+ max = zone->uz_max_items;
+ if (total >= max)
+ continue;
+ /* Truncate if necessary, otherwise wake other sleepers. */
+ if (total + count > max) {
+ zone_free_limit(zone, total + count - max);
+ count = max - total;
+ } else if (total + count < max && UZ_ITEMS_SLEEPERS(old) != 0)
+ wakeup_one(&zone->uz_max_items);
+
+ return (count);
+ }
+}
+
+/*
+ * Allocate 'count' items from our max_items limit. Returns the number
+ * available. If M_NOWAIT is not specified it will sleep until at least
+ * one item can be allocated.
+ */
+static int
+zone_alloc_limit(uma_zone_t zone, int count, int flags)
+{
+ uint64_t old;
+ uint64_t max;
+
+ max = zone->uz_max_items;
+ MPASS(max > 0);
+
+ /*
+ * We expect normal allocations to succeed with a simple
+ * fetchadd.
+ */
+ old = atomic_fetchadd_64(&zone->uz_items, count);
+ if (__predict_true(old + count <= max))
+ return (count);
+
+ /*
+ * If we had some items and no sleepers just return the
+ * truncated value. We have to release the excess space
+ * though because that may wake sleepers who weren't woken
+ * because we were temporarily over the limit.
+ */
+ if (old < max) {
+ zone_free_limit(zone, (old + count) - max);
+ return (max - old);
+ }
+ return (zone_alloc_limit_hard(zone, count, flags));
+}
+
+/*
+ * Free a number of items back to the limit.
+ */
+static void
+zone_free_limit(uma_zone_t zone, int count)
+{
+ uint64_t old;
+
+ MPASS(count > 0);
+
+ /*
+ * In the common case we either have no sleepers or
+ * are still over the limit and can just return.
+ */
+ old = atomic_fetchadd_64(&zone->uz_items, -count);
+ if (__predict_true(UZ_ITEMS_SLEEPERS(old) == 0 ||
+ UZ_ITEMS_COUNT(old) - count >= zone->uz_max_items))
+ return;
+
+ /*
+ * Moderate the rate of wakeups. Sleepers will continue
+ * to generate wakeups if necessary.
+ */
+ wakeup_one(&zone->uz_max_items);
+}
+
static uma_bucket_t
zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags)
{
@@ -3328,15 +3487,13 @@
if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain))
domain = UMA_ANYDOMAIN;
- if (zone->uz_max_items > 0) {
- if (zone->uz_items >= zone->uz_max_items)
- return (false);
- maxbucket = MIN(zone->uz_bucket_size,
- zone->uz_max_items - zone->uz_items);
- zone->uz_items += maxbucket;
- } else
+ if (zone->uz_max_items > 0)
+ maxbucket = zone_alloc_limit(zone, zone->uz_bucket_size,
+ M_NOWAIT);
+ else
maxbucket = zone->uz_bucket_size;
- ZONE_UNLOCK(zone);
+ if (maxbucket == 0)
+ return (false);
/* Don't wait for buckets, preserve caller's NOVM setting. */
bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM));
@@ -3380,15 +3537,8 @@
bucket = NULL;
}
out:
- ZONE_LOCK(zone);
- if (zone->uz_max_items > 0 && cnt < maxbucket) {
- MPASS(zone->uz_items >= maxbucket - cnt);
- zone->uz_items -= maxbucket - cnt;
- if (zone->uz_sleepers > 0 &&
- (cnt == 0 ? zone->uz_items + 1 : zone->uz_items) <
- zone->uz_max_items)
- wakeup_one(zone);
- }
+ if (zone->uz_max_items > 0 && cnt < maxbucket)
+ zone_free_limit(zone, maxbucket - cnt);
return (bucket);
}
@@ -3410,43 +3560,11 @@
static void *
zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
{
-
- ZONE_LOCK(zone);
- return (zone_alloc_item_locked(zone, udata, domain, flags));
-}
-
-/*
- * Returns with zone unlocked.
- */
-static void *
-zone_alloc_item_locked(uma_zone_t zone, void *udata, int domain, int flags)
-{
void *item;
- ZONE_LOCK_ASSERT(zone);
+ if (zone->uz_max_items > 0 && zone_alloc_limit(zone, 1, flags) == 0)
+ return (NULL);
- if (zone->uz_max_items > 0) {
- if (zone->uz_items >= zone->uz_max_items) {
- zone_log_warning(zone);
- zone_maxaction(zone);
- if (flags & M_NOWAIT) {
- ZONE_UNLOCK(zone);
- return (NULL);
- }
- zone->uz_sleeps++;
- zone->uz_sleepers++;
- while (zone->uz_items >= zone->uz_max_items)
- mtx_sleep(zone, zone->uz_lockptr, PVM,
- "zonelimit", 0);
- zone->uz_sleepers--;
- if (zone->uz_sleepers > 0 &&
- zone->uz_items + 1 < zone->uz_max_items)
- wakeup_one(zone);
- }
- zone->uz_items++;
- }
- ZONE_UNLOCK(zone);
-
/* Avoid allocs targeting empty domains. */
if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain))
domain = UMA_ANYDOMAIN;
@@ -3479,14 +3597,11 @@
fail_cnt:
counter_u64_add(zone->uz_fails, 1);
fail:
- if (zone->uz_max_items > 0) {
- ZONE_LOCK(zone);
- /* XXX Decrement without wakeup */
- zone->uz_items--;
- ZONE_UNLOCK(zone);
- }
+ if (zone->uz_max_items > 0)
+ zone_free_limit(zone, 1);
CTR2(KTR_UMA, "zone_alloc_item failed from %s(%p)",
zone->uz_name, zone);
+
return (NULL);
}
@@ -3832,14 +3947,8 @@
counter_u64_add(zone->uz_frees, 1);
- if (zone->uz_max_items > 0) {
- ZONE_LOCK(zone);
- zone->uz_items--;
- if (zone->uz_sleepers > 0 &&
- zone->uz_items < zone->uz_max_items)
- wakeup_one(zone);
- ZONE_UNLOCK(zone);
- }
+ if (zone->uz_max_items > 0)
+ zone_free_limit(zone, 1);
}
/* See uma.h */
@@ -3849,6 +3958,11 @@
struct uma_bucket_zone *ubz;
int count;
+ /*
+ * XXX This can misbehave if the zone has any allocations with
+ * no limit and a limit is imposed. There is currently no
+ * way to clear a limit.
+ */
ZONE_LOCK(zone);
ubz = bucket_zone_max(zone, nitems);
count = ubz != NULL ? ubz->ubz_entries : 0;
@@ -3858,6 +3972,8 @@
zone->uz_max_items = nitems;
zone->uz_flags |= UMA_ZFLAG_LIMIT;
zone_update_caches(zone);
+ /* We may need to wake waiters. */
+ wakeup(&zone->uz_max_items);
ZONE_UNLOCK(zone);
return (nitems);
@@ -4416,6 +4532,7 @@
struct sbuf sbuf;
uma_keg_t kz;
uma_zone_t z;
+ uint64_t items;
int count, error, i;
error = sysctl_wire_old_buffer(req, 0);
@@ -4452,10 +4569,11 @@
uth.uth_align = kz->uk_align;
uth.uth_size = kz->uk_size;
uth.uth_rsize = kz->uk_rsize;
- if (z->uz_max_items > 0)
- uth.uth_pages = (z->uz_items / kz->uk_ipers) *
+ if (z->uz_max_items > 0) {
+ items = UZ_ITEMS_COUNT(z->uz_items);
+ uth.uth_pages = (items / kz->uk_ipers) *
kz->uk_ppera;
- else
+ } else
uth.uth_pages = kz->uk_pages;
uth.uth_maxpages = (z->uz_max_items / kz->uk_ipers) *
kz->uk_ppera;
@@ -4587,6 +4705,16 @@
avail *= mp_maxid + 1;
effpct = 100 * avail / total;
return (sysctl_handle_int(oidp, &effpct, 0, req));
+}
+
+static int
+sysctl_handle_uma_zone_items(SYSCTL_HANDLER_ARGS)
+{
+ uma_zone_t zone = arg1;
+ uint64_t cur;
+
+ cur = UZ_ITEMS_COUNT(atomic_load_64(&zone->uz_items));
+ return (sysctl_handle_64(oidp, &cur, 0, req));
}
#ifdef INVARIANTS
Index: head/sys/vm/uma_int.h
===================================================================
--- head/sys/vm/uma_int.h
+++ head/sys/vm/uma_int.h
@@ -406,10 +406,7 @@
typedef struct uma_zone_domain * uma_zone_domain_t;
/*
- * Zone management structure
- *
- * TODO: Optimize for cache line size
- *
+ * Zone structure - per memory type.
*/
struct uma_zone {
/* Offset 0, used in alloc/free fast/medium fast path and const. */
@@ -422,9 +419,9 @@
uint32_t uz_size; /* Size inherited from kegs */
uma_ctor uz_ctor; /* Constructor for each allocation */
uma_dtor uz_dtor; /* Destructor */
- uint64_t uz_items; /* Total items count */
+ uint64_t uz_spare0;
uint64_t uz_max_items; /* Maximum number of items to alloc */
- uint32_t uz_sleepers; /* Number of sleepers on memory */
+ uint32_t uz_sleepers; /* Threads sleeping on limit */
uint16_t uz_bucket_size; /* Number of items in full bucket */
uint16_t uz_bucket_size_max; /* Maximum number of bucket items */
@@ -434,7 +431,7 @@
void *uz_arg; /* Import/release argument. */
uma_init uz_init; /* Initializer for each item */
uma_fini uz_fini; /* Finalizer for each item. */
- void *uz_spare;
+ void *uz_spare1;
uint64_t uz_bkt_count; /* Items in bucket cache */
uint64_t uz_bkt_max; /* Maximum bucket cache size */
@@ -459,6 +456,8 @@
counter_u64_t uz_fails; /* Total number of alloc failures */
uint64_t uz_sleeps; /* Total number of alloc sleeps */
uint64_t uz_xdomain; /* Total number of cross-domain frees */
+ volatile uint64_t uz_items; /* Total items count & sleepers */
+
char *uz_ctlname; /* sysctl safe name string. */
struct sysctl_oid *uz_oid; /* sysctl oid pointer. */
int uz_namecnt; /* duplicate name count. */
@@ -514,6 +513,17 @@
"\3STATIC" \
"\2ZINIT" \
"\1PAGEABLE"
+
+/*
+ * Macros for interpreting the uz_items field. 20 bits of sleeper count
+ * and 44 bit of item count.
+ */
+#define UZ_ITEMS_SLEEPER_SHIFT 44LL
+#define UZ_ITEMS_SLEEPERS_MAX ((1 << (64 - UZ_ITEMS_SLEEPER_SHIFT)) - 1)
+#define UZ_ITEMS_COUNT_MASK ((1LL << UZ_ITEMS_SLEEPER_SHIFT) - 1)
+#define UZ_ITEMS_COUNT(x) ((x) & UZ_ITEMS_COUNT_MASK)
+#define UZ_ITEMS_SLEEPERS(x) ((x) >> UZ_ITEMS_SLEEPER_SHIFT)
+#define UZ_ITEMS_SLEEPER (1LL << UZ_ITEMS_SLEEPER_SHIFT)
#undef UMA_ALIGN
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Nov 17, 8:00 AM (21 h, 28 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14672939
Default Alt Text
D22827.diff (17 KB)
Attached To
Mode
D22827: (umaperf 3/7) atomic zone limits
Attached
Detach File
Event Timeline
Log In to Comment