Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F115473768
D40403.id133461.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
14 KB
Referenced Files
None
Subscribers
None
D40403.id133461.diff
View Options
diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h
--- a/sys/amd64/include/vmparam.h
+++ b/sys/amd64/include/vmparam.h
@@ -94,14 +94,16 @@
#define VM_PHYSSEG_MAX 63
/*
- * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool
- * from which physical pages are allocated and VM_FREEPOOL_DIRECT is
- * the pool from which physical pages for page tables and small UMA
- * objects are allocated.
+ * Create three free page pools: VM_FREEPOOL_DEFAULT is the default pool from
+ * which physical pages are allocated and VM_FREEPOOL_DIRECT is the pool from
+ * which physical pages for page tables and small UMA objects are allocated.
+ * VM_FREEPOOL_LAZINIT is a special-purpose pool that is populated only during
+ * boot and is used to implement deferred initialization of page structures.
*/
-#define VM_NFREEPOOL 2
-#define VM_FREEPOOL_DEFAULT 0
-#define VM_FREEPOOL_DIRECT 1
+#define VM_NFREEPOOL 3
+#define VM_FREEPOOL_LAZYINIT 0
+#define VM_FREEPOOL_DEFAULT 1
+#define VM_FREEPOOL_DIRECT 2
/*
* Create up to three free page lists: VM_FREELIST_DMA32 is for physical pages
diff --git a/sys/arm64/include/vmparam.h b/sys/arm64/include/vmparam.h
--- a/sys/arm64/include/vmparam.h
+++ b/sys/arm64/include/vmparam.h
@@ -73,14 +73,16 @@
#define VM_PHYSSEG_MAX 64
/*
- * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool
- * from which physical pages are allocated and VM_FREEPOOL_DIRECT is
- * the pool from which physical pages for small UMA objects are
- * allocated.
+ * Create three free page pools: VM_FREEPOOL_DEFAULT is the default pool from
+ * which physical pages are allocated and VM_FREEPOOL_DIRECT is the pool from
+ * which physical pages for page tables and small UMA objects are allocated.
+ * VM_FREEPOOL_LAZINIT is a special-purpose pool that is populated only during
+ * boot and is used to implement deferred initialization of page structures.
*/
-#define VM_NFREEPOOL 2
-#define VM_FREEPOOL_DEFAULT 0
-#define VM_FREEPOOL_DIRECT 1
+#define VM_NFREEPOOL 3
+#define VM_FREEPOOL_LAZYINIT 0
+#define VM_FREEPOOL_DEFAULT 1
+#define VM_FREEPOOL_DIRECT 2
/*
* Create two free page lists: VM_FREELIST_DMA32 is for physical pages that have
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -333,9 +333,9 @@
if (m == NULL)
return (true); /* page does not exist, no failure */
- vmd = vm_pagequeue_domain(m);
+ vmd = VM_DOMAIN(vm_phys_domain(pa));
vm_domain_free_lock(vmd);
- found = vm_phys_unfree_page(m);
+ found = vm_phys_unfree_page(pa);
vm_domain_free_unlock(vmd);
if (found) {
vm_domain_freecnt_inc(vmd, -1);
@@ -568,6 +568,9 @@
#if defined(__i386__) && defined(VM_PHYSSEG_DENSE)
long ii;
#endif
+#ifdef VM_FREEPOOL_LAZYINIT
+ int lazyinit;
+#endif
vaddr = round_page(vaddr);
@@ -750,6 +753,11 @@
*/
vm_phys_init();
+#ifdef VM_FREEPOOL_LAZYINIT
+ lazyinit = 1;
+ TUNABLE_INT_FETCH("debug.vm.lazy_page_init", &lazyinit);
+#endif
+
/*
* Initialize the page structures and add every available page to the
* physical memory allocator's free lists.
@@ -765,9 +773,50 @@
vm_cnt.v_page_count = 0;
for (segind = 0; segind < vm_phys_nsegs; segind++) {
seg = &vm_phys_segs[segind];
- for (m = seg->first_page, pa = seg->start; pa < seg->end;
- m++, pa += PAGE_SIZE)
- vm_page_init_page(m, pa, segind, VM_FREEPOOL_DEFAULT);
+
+ /*
+ * If lazy vm_page initialization is not enabled, simply
+ * initialize all of the pages in the segment. Otherwise, we
+ * only initialize:
+ * 1. Pages not covered by phys_avail[], since they might be
+ * freed to the allocator at some future point, e.g., by
+ * kmem_bootstrap_free().
+ * 2. The first page of each run of free pages handed to the
+ * vm_phys allocator, which in turn defers initialization
+ * of pages until they are needed.
+ * This avoids blocking the boot process for long periods, which
+ * may be relevant for VMs (which ought to boot as quickly as
+ * possible) and/or systems with large amounts of physical
+ * memory.
+ */
+#ifdef VM_FREEPOOL_LAZYINIT
+ if (lazyinit) {
+ startp = seg->start;
+ for (i = 0; phys_avail[i + 1] != 0; i += 2) {
+ if (startp >= seg->end)
+ break;
+
+ if (phys_avail[i + 1] < startp)
+ continue;
+ if (phys_avail[i] <= startp) {
+ startp = phys_avail[i + 1];
+ continue;
+ }
+
+ m = &seg->first_page[atop(startp - seg->start)];
+ for (endp = MIN(phys_avail[i], seg->end);
+ startp < endp; startp += PAGE_SIZE, m++) {
+ vm_page_init_page(m, startp, segind,
+ VM_FREEPOOL_DEFAULT);
+ }
+ }
+ } else
+#endif
+ for (m = seg->first_page, pa = seg->start;
+ pa < seg->end; m++, pa += PAGE_SIZE) {
+ vm_page_init_page(m, pa, segind,
+ VM_FREEPOOL_DEFAULT);
+ }
/*
* Add the segment's pages that are covered by one of
@@ -785,6 +834,12 @@
continue;
m = seg->first_page + atop(startp - seg->start);
+#ifdef VM_FREEPOOL_LAZYINIT
+ if (lazyinit) {
+ vm_page_init_page(m, startp, segind,
+ VM_FREEPOOL_LAZYINIT);
+ }
+#endif
vmd = VM_DOMAIN(seg->domain);
vm_domain_free_lock(vmd);
vm_phys_enqueue_contig(m, pagecount);
diff --git a/sys/vm/vm_phys.h b/sys/vm/vm_phys.h
--- a/sys/vm/vm_phys.h
+++ b/sys/vm/vm_phys.h
@@ -79,7 +79,7 @@
vm_page_t vm_phys_paddr_to_vm_page(vm_paddr_t pa);
void vm_phys_register_domains(int ndomains, struct mem_affinity *affinity,
int *locality);
-bool vm_phys_unfree_page(vm_page_t m);
+bool vm_phys_unfree_page(vm_paddr_t pa);
int vm_phys_mem_affinity(int f, int t);
void vm_phys_early_add_seg(vm_paddr_t start, vm_paddr_t end);
vm_paddr_t vm_phys_early_alloc(int domain, size_t alloc_size);
diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c
--- a/sys/vm/vm_phys.c
+++ b/sys/vm/vm_phys.c
@@ -47,14 +47,18 @@
#include <sys/domainset.h>
#include <sys/lock.h>
#include <sys/kernel.h>
+#include <sys/kthread.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/rwlock.h>
#include <sys/sbuf.h>
+#include <sys/sched.h>
#include <sys/sysctl.h>
#include <sys/tree.h>
+#include <sys/tslog.h>
+#include <sys/unistd.h>
#include <sys/vmmeter.h>
#include <ddb/ddb.h>
@@ -141,6 +145,7 @@
* Provides the mapping from VM_FREELIST_* to free list indices (flind).
*/
static int __read_mostly vm_freelist_to_flind[VM_NFREELIST];
+static int __read_mostly vm_default_freepool;
CTASSERT(VM_FREELIST_DEFAULT == 0);
@@ -184,6 +189,16 @@
static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
int order, int tail);
+static bool __diagused
+vm_phys_pool_valid(int pool)
+{
+#ifdef VM_FREEPOOL_LAZYINIT
+ if (pool == VM_FREEPOOL_LAZYINIT)
+ return (false);
+#endif
+ return (pool >= 0 && pool < VM_NFREEPOOL);
+}
+
/*
* Red-black tree helpers for vm fictitious range management.
*/
@@ -620,6 +635,12 @@
}
}
+#ifdef VM_FREEPOOL_LAZYINIT
+ vm_default_freepool = VM_FREEPOOL_LAZYINIT;
+#else
+ vm_default_freepool = VM_FREEPOOL_DEFAULT;
+#endif
+
rw_init(&vm_phys_fictitious_reg_lock, "vmfctr");
}
@@ -690,6 +711,17 @@
("%s: invalid order %d", __func__, order));
vm_freelist_add(fl, m, order, tail);
+#ifdef VM_FREEPOOL_LAZYINIT
+ if (__predict_false(m->pool == VM_FREEPOOL_LAZYINIT)) {
+ vm_page_t m_next;
+ int npages;
+
+ npages = 1 << order;
+ m_next = m + npages;
+ vm_page_init_page(m_next, m->phys_addr + ptoa(npages), m->segind,
+ VM_FREEPOOL_LAZYINIT);
+ }
+#endif
}
/*
@@ -761,15 +793,33 @@
}
/*
- * Set the pool for a contiguous, power of two-sized set of physical pages.
+ * Set the pool for a contiguous, power of two-sized set of physical pages.
+ *
+ * If the pages currently belong to the lazy init pool, then the corresponding
+ * page structures must be initialized. In this case it is assumed that the
+ * first page in the run has already been initialized.
*/
static void
vm_phys_set_pool(int pool, vm_page_t m, int order)
{
- vm_page_t m_tmp;
-
- for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
- m_tmp->pool = pool;
+#ifdef VM_FREEPOOL_LAZYINIT
+ if (__predict_false(m->pool == VM_FREEPOOL_LAZYINIT)) {
+ vm_paddr_t pa;
+ int segind;
+
+ m->pool = pool;
+
+ TSENTER();
+ pa = m->phys_addr + PAGE_SIZE;
+ segind = m->segind;
+ for (vm_page_t m_tmp = m + 1; m_tmp < &m[1 << order];
+ m_tmp++, pa += PAGE_SIZE)
+ vm_page_init_page(m_tmp, pa, segind, pool);
+ TSEXIT();
+ } else
+#endif
+ for (vm_page_t m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
+ m_tmp->pool = pool;
}
/*
@@ -793,7 +843,7 @@
KASSERT(domain >= 0 && domain < vm_ndomains,
("vm_phys_alloc_npages: domain %d is out of range", domain));
- KASSERT(pool < VM_NFREEPOOL,
+ KASSERT(vm_phys_pool_valid(pool),
("vm_phys_alloc_npages: pool %d is out of range", pool));
KASSERT(npages <= 1 << (VM_NFREEORDER - 1),
("vm_phys_alloc_npages: npages %d is out of range", npages));
@@ -822,7 +872,8 @@
}
}
for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
- for (pind = 0; pind < VM_NFREEPOOL; pind++) {
+ for (pind = vm_default_freepool; pind < VM_NFREEPOOL;
+ pind++) {
alt = vm_phys_free_queues[domain][flind][pind];
while ((m = TAILQ_FIRST(&alt[oind].pl)) !=
NULL) {
@@ -889,7 +940,7 @@
KASSERT(freelist < VM_NFREELIST,
("vm_phys_alloc_freelist_pages: freelist %d is out of range",
freelist));
- KASSERT(pool < VM_NFREEPOOL,
+ KASSERT(vm_phys_pool_valid(pool),
("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
KASSERT(order < VM_NFREEORDER,
("vm_phys_alloc_freelist_pages: order %d is out of range", order));
@@ -918,7 +969,7 @@
* use them to satisfy the allocation.
*/
for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
- for (pind = 0; pind < VM_NFREEPOOL; pind++) {
+ for (pind = vm_default_freepool; pind < VM_NFREEPOOL; pind++) {
alt = &vm_phys_free_queues[domain][flind][pind][0];
m = TAILQ_FIRST(&alt[oind].pl);
if (m != NULL) {
@@ -1158,7 +1209,7 @@
KASSERT(m->order == VM_NFREEORDER,
("vm_phys_free_pages: page %p has unexpected order %d",
m, m->order));
- KASSERT(m->pool < VM_NFREEPOOL,
+ KASSERT(vm_phys_pool_valid(m->pool),
("vm_phys_free_pages: page %p has unexpected pool %d",
m, m->pool));
KASSERT(order < VM_NFREEORDER,
@@ -1187,6 +1238,107 @@
vm_freelist_add(fl, m, order, 1);
}
+#ifdef VM_FREEPOOL_LAZYINIT
+/*
+ * Initialize all pages lingering in the lazy init pool of a NUMA domain, moving
+ * them to the default pool. This is a prerequisite for some rare operations
+ * which need to scan the page array and thus depend on all pages being
+ * initialized.
+ */
+static void
+vm_phys_lazy_init_domain(int domain, bool locked)
+{
+ static bool initdone[MAXMEMDOM];
+ struct vm_domain *vmd;
+ struct vm_freelist *fl;
+ vm_page_t m;
+ int pind;
+ bool unlocked;
+
+ if (__predict_true(atomic_load_bool(&initdone[domain])))
+ return;
+
+ vmd = VM_DOMAIN(domain);
+ if (locked)
+ vm_domain_free_assert_locked(vmd);
+ else
+ vm_domain_free_lock(vmd);
+ if (atomic_load_bool(&initdone[domain]))
+ goto out;
+ pind = VM_FREEPOOL_LAZYINIT;
+ for (int freelist = 0; freelist < VM_NFREELIST; freelist++) {
+ int flind;
+
+ flind = vm_freelist_to_flind[freelist];
+ if (flind < 0)
+ continue;
+ fl = vm_phys_free_queues[domain][flind][pind];
+ for (int oind = 0; oind < VM_NFREEORDER; oind++) {
+ if (atomic_load_int(&fl[oind].lcnt) == 0)
+ continue;
+ while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) {
+ /*
+ * Avoid holding the lock across the
+ * initialization unless there's a free page
+ * shortage.
+ */
+ vm_freelist_rem(fl, m, oind);
+ unlocked = vm_domain_allocate(vmd,
+ VM_ALLOC_NORMAL, 1 << oind);
+ if (unlocked)
+ vm_domain_free_unlock(vmd);
+ vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, oind);
+ if (unlocked) {
+ vm_domain_freecnt_inc(vmd, 1 << oind);
+ vm_domain_free_lock(vmd);
+ }
+ vm_phys_free_pages(m, oind);
+ }
+ }
+ }
+ atomic_store_bool(&initdone[domain], true);
+out:
+ if (!locked)
+ vm_domain_free_unlock(vmd);
+}
+
+static void
+vm_phys_lazy_init(void)
+{
+ for (int domain = 0; domain < vm_ndomains; domain++)
+ vm_phys_lazy_init_domain(domain, false);
+ atomic_store_int(&vm_default_freepool, VM_FREEPOOL_DEFAULT);
+}
+
+static void
+vm_phys_lazy_init_kthr(void *arg __unused)
+{
+ vm_phys_lazy_init();
+ kthread_exit();
+}
+
+static void
+vm_phys_lazy_sysinit(void *arg __unused)
+{
+ struct thread *td;
+ int error;
+
+ error = kthread_add(vm_phys_lazy_init_kthr, NULL, curproc, &td,
+ RFSTOPPED, 0, "vmlazyinit");
+ if (error == 0) {
+ thread_lock(td);
+ sched_prio(td, PRI_MIN_IDLE);
+ sched_add(td, SRQ_BORING);
+ } else {
+ printf("%s: could not create lazy init thread: %d\n",
+ __func__, error);
+ vm_phys_lazy_init();
+ }
+}
+SYSINIT(vm_phys_lazy_init, SI_SUB_SMP, SI_ORDER_ANY, vm_phys_lazy_sysinit,
+ NULL);
+#endif /* VM_FREEPOOL_LAZYINIT */
+
/*
* Free a contiguous, arbitrarily sized set of physical pages, without
* merging across set boundaries.
@@ -1292,6 +1444,12 @@
pa_end = MIN(high, seg->end);
if (pa_end - pa_start < ptoa(npages))
continue;
+#ifdef VM_FREEPOOL_LAZYINIT
+ /*
+ * The pages on the free lists must be initialized.
+ */
+ vm_phys_lazy_init_domain(domain, false);
+#endif
bounds[0] = vm_phys_seg_paddr_to_vm_page(seg, pa_start);
bounds[1] = vm_phys_seg_paddr_to_vm_page(seg, pa_end);
return (seg - vm_phys_segs);
@@ -1307,21 +1465,30 @@
* The free page queues must be locked.
*/
bool
-vm_phys_unfree_page(vm_page_t m)
+vm_phys_unfree_page(vm_paddr_t pa)
{
struct vm_freelist *fl;
struct vm_phys_seg *seg;
- vm_paddr_t pa, pa_half;
- vm_page_t m_set, m_tmp;
+ vm_paddr_t pa_half;
+ vm_page_t m, m_set, m_tmp;
int order;
+ seg = vm_phys_paddr_to_seg(pa);
+ vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
+
+ /*
+ * The pages on the free lists must be initialized.
+ */
+#ifdef VM_FREEPOOL_LAZYINIT
+ vm_phys_lazy_init_domain(seg->domain, true);
+#endif
+
/*
* First, find the contiguous, power of two-sized set of free
* physical pages containing the given physical page "m" and
* assign it to "m_set".
*/
- seg = &vm_phys_segs[m->segind];
- vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
+ m = vm_phys_paddr_to_vm_page(pa);
for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
order < VM_NFREEORDER - 1; ) {
order++;
@@ -1460,7 +1627,7 @@
/* Search for a large enough free block. */
size = npages << PAGE_SHIFT;
for (oind = order; oind < VM_NFREEORDER; oind++) {
- for (pind = 0; pind < VM_NFREEPOOL; pind++) {
+ for (pind = vm_default_freepool; pind < VM_NFREEPOOL; pind++) {
fl = (*queues)[pind];
TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) {
/*
@@ -1480,7 +1647,7 @@
if (order < VM_NFREEORDER)
return (NULL);
/* Search for a long-enough sequence of max-order blocks. */
- for (pind = 0; pind < VM_NFREEPOOL; pind++) {
+ for (pind = vm_default_freepool; pind < VM_NFREEPOOL; pind++) {
fl = (*queues)[pind];
m_ret = vm_phys_find_freelist_contig(fl, npages,
low, high, alignment, boundary);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Apr 25, 5:47 AM (11 h, 46 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
17777405
Default Alt Text
D40403.id133461.diff (14 KB)
Attached To
Mode
D40403: vm_page: Implement lazy page initialization
Attached
Detach File
Event Timeline
Log In to Comment