Page MenuHomeFreeBSD

D40403.id122777.diff
No OneTemporary

D40403.id122777.diff

diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h
--- a/sys/amd64/include/vmparam.h
+++ b/sys/amd64/include/vmparam.h
@@ -97,14 +97,16 @@
#define VM_PHYSSEG_MAX 63
/*
- * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool
- * from which physical pages are allocated and VM_FREEPOOL_DIRECT is
- * the pool from which physical pages for page tables and small UMA
- * objects are allocated.
+ * Create three free page pools: VM_FREEPOOL_DEFAULT is the default pool from
+ * which physical pages are allocated and VM_FREEPOOL_DIRECT is the pool from
+ * which physical pages for page tables and small UMA objects are allocated.
+ * VM_FREEPOOL_LAZINIT is a special-purpose pool that is populated only during
+ * boot and is used to implement deferred initialization of page structures.
*/
-#define VM_NFREEPOOL 2
-#define VM_FREEPOOL_DEFAULT 0
-#define VM_FREEPOOL_DIRECT 1
+#define VM_NFREEPOOL 3
+#define VM_FREEPOOL_LAZYINIT 0
+#define VM_FREEPOOL_DEFAULT 1
+#define VM_FREEPOOL_DIRECT 2
/*
* Create up to three free page lists: VM_FREELIST_DMA32 is for physical pages
diff --git a/sys/arm64/include/vmparam.h b/sys/arm64/include/vmparam.h
--- a/sys/arm64/include/vmparam.h
+++ b/sys/arm64/include/vmparam.h
@@ -72,14 +72,16 @@
#define VM_PHYSSEG_MAX 64
/*
- * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool
- * from which physical pages are allocated and VM_FREEPOOL_DIRECT is
- * the pool from which physical pages for small UMA objects are
- * allocated.
+ * Create three free page pools: VM_FREEPOOL_DEFAULT is the default pool from
+ * which physical pages are allocated and VM_FREEPOOL_DIRECT is the pool from
+ * which physical pages for page tables and small UMA objects are allocated.
+ * VM_FREEPOOL_LAZINIT is a special-purpose pool that is populated only during
+ * boot and is used to implement deferred initialization of page structures.
*/
-#define VM_NFREEPOOL 2
-#define VM_FREEPOOL_DEFAULT 0
-#define VM_FREEPOOL_DIRECT 1
+#define VM_NFREEPOOL 3
+#define VM_FREEPOOL_LAZYINIT 0
+#define VM_FREEPOOL_DEFAULT 1
+#define VM_FREEPOOL_DIRECT 2
/*
* Create one free page lists: VM_FREELIST_DEFAULT is for all physical
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -333,9 +333,9 @@
if (m == NULL)
return (true); /* page does not exist, no failure */
- vmd = vm_pagequeue_domain(m);
+ vmd = VM_DOMAIN(vm_phys_domain(pa));
vm_domain_free_lock(vmd);
- found = vm_phys_unfree_page(m);
+ found = vm_phys_unfree_page(pa);
vm_domain_free_unlock(vmd);
if (found) {
vm_domain_freecnt_inc(vmd, -1);
@@ -765,9 +765,12 @@
vm_cnt.v_page_count = 0;
for (segind = 0; segind < vm_phys_nsegs; segind++) {
seg = &vm_phys_segs[segind];
+
+#ifndef VM_FREEPOOL_LAZYINIT
for (m = seg->first_page, pa = seg->start; pa < seg->end;
m++, pa += PAGE_SIZE)
vm_page_init_page(m, pa, segind, VM_FREEPOOL_DEFAULT);
+#endif
/*
* Add the segment's pages that are covered by one of
@@ -785,6 +788,10 @@
continue;
m = seg->first_page + atop(startp - seg->start);
+#ifdef VM_FREEPOOL_LAZYINIT
+ vm_page_init_page(m, startp, segind,
+ VM_FREEPOOL_LAZYINIT);
+#endif
vmd = VM_DOMAIN(seg->domain);
vm_domain_free_lock(vmd);
vm_phys_enqueue_contig(m, pagecount);
diff --git a/sys/vm/vm_phys.h b/sys/vm/vm_phys.h
--- a/sys/vm/vm_phys.h
+++ b/sys/vm/vm_phys.h
@@ -42,6 +42,8 @@
#ifdef _KERNEL
+#include <vm/_vm_phys.h>
+
extern vm_paddr_t phys_avail[];
/* Domains must be dense (non-sparse) and zero-based. */
@@ -79,7 +81,7 @@
int *locality);
vm_page_t vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low,
vm_paddr_t high, u_long alignment, vm_paddr_t boundary, int options);
-bool vm_phys_unfree_page(vm_page_t m);
+bool vm_phys_unfree_page(vm_paddr_t pa);
int vm_phys_mem_affinity(int f, int t);
void vm_phys_early_add_seg(vm_paddr_t start, vm_paddr_t end);
vm_paddr_t vm_phys_early_alloc(int domain, size_t alloc_size);
@@ -106,5 +108,19 @@
#endif
}
+static inline struct vm_phys_seg *
+vm_phys_seg(vm_paddr_t pa)
+{
+ struct vm_phys_seg *seg;
+ int segind;
+
+ for (segind = 0; segind < vm_phys_nsegs; segind++) {
+ seg = &vm_phys_segs[segind];
+ if (pa >= seg->start && pa < seg->end)
+ return (seg);
+ }
+ return (NULL);
+}
+
#endif /* _KERNEL */
#endif /* !_VM_PHYS_H_ */
diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c
--- a/sys/vm/vm_phys.c
+++ b/sys/vm/vm_phys.c
@@ -49,14 +49,18 @@
#include <sys/domainset.h>
#include <sys/lock.h>
#include <sys/kernel.h>
+#include <sys/kthread.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/rwlock.h>
#include <sys/sbuf.h>
+#include <sys/sched.h>
#include <sys/sysctl.h>
#include <sys/tree.h>
+#include <sys/tslog.h>
+#include <sys/unistd.h>
#include <sys/vmmeter.h>
#include <ddb/ddb.h>
@@ -178,6 +182,16 @@
static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
int order, int tail);
+static bool
+vm_phys_pool_valid(int pool)
+{
+#ifdef VM_FREEPOOL_LAZYINIT
+ if (pool == VM_FREEPOOL_LAZYINIT)
+ return (false);
+#endif
+ return (pool >= 0 && pool < VM_NFREEPOOL);
+}
+
/*
* Red-black tree helpers for vm fictitious range management.
*/
@@ -716,15 +730,33 @@
}
/*
- * Set the pool for a contiguous, power of two-sized set of physical pages.
+ * Set the pool for a contiguous, power of two-sized set of physical pages.
+ *
+ * If the pages currently belong to the lazy init pool, then the corresponding
+ * page structures must be initialized. In this case it is assumed that the
+ * first page in the run has already been initialized.
*/
static void
vm_phys_set_pool(int pool, vm_page_t m, int order)
{
- vm_page_t m_tmp;
-
- for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
- m_tmp->pool = pool;
+#ifdef VM_FREEPOOL_LAZYINIT
+ if (__predict_false(m->pool == VM_FREEPOOL_LAZYINIT)) {
+ vm_paddr_t pa;
+ int segind;
+
+ m->pool = pool;
+
+ TSENTER();
+ pa = m->phys_addr + PAGE_SIZE;
+ segind = m->segind;
+ for (vm_page_t m_tmp = m + 1; m_tmp < &m[1 << order];
+ m_tmp++, pa += PAGE_SIZE)
+ vm_page_init_page(m_tmp, pa, segind, pool);
+ TSEXIT();
+ } else
+#endif
+ for (vm_page_t m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
+ m_tmp->pool = pool;
}
/*
@@ -748,7 +780,7 @@
KASSERT(domain >= 0 && domain < vm_ndomains,
("vm_phys_alloc_npages: domain %d is out of range", domain));
- KASSERT(pool < VM_NFREEPOOL,
+ KASSERT(vm_phys_pool_valid(pool),
("vm_phys_alloc_npages: pool %d is out of range", pool));
KASSERT(npages <= 1 << (VM_NFREEORDER - 1),
("vm_phys_alloc_npages: npages %d is out of range", npages));
@@ -847,7 +879,7 @@
KASSERT(freelist < VM_NFREELIST,
("vm_phys_alloc_freelist_pages: freelist %d is out of range",
freelist));
- KASSERT(pool < VM_NFREEPOOL,
+ KASSERT(vm_phys_pool_valid(pool),
("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
KASSERT(order < VM_NFREEORDER,
("vm_phys_alloc_freelist_pages: order %d is out of range", order));
@@ -1107,7 +1139,7 @@
KASSERT(m->order == VM_NFREEORDER,
("vm_phys_free_pages: page %p has unexpected order %d",
m, m->order));
- KASSERT(m->pool < VM_NFREEPOOL,
+ KASSERT(vm_phys_pool_valid(m->pool),
("vm_phys_free_pages: page %p has unexpected pool %d",
m, m->pool));
KASSERT(order < VM_NFREEORDER,
@@ -1136,6 +1168,103 @@
vm_freelist_add(fl, m, order, 1);
}
+#ifdef VM_FREEPOOL_LAZYINIT
+/*
+ * Initialize all pages lingering in the lazy init pool of a NUMA domain, moving
+ * them to the default pool. This is a prerequisite for some rare operations
+ * which need to scan the page array and thus depend on all pages being
+ * initialized.
+ */
+static void
+vm_phys_lazy_init(int domain, bool locked)
+{
+ static bool initdone[MAXMEMDOM];
+ struct vm_domain *vmd;
+ struct vm_freelist *fl;
+ vm_page_t m;
+ int pind;
+ bool unlocked;
+
+ if (atomic_load_bool(&initdone[domain]))
+ return;
+
+ vmd = VM_DOMAIN(domain);
+ if (locked)
+ vm_domain_free_assert_locked(vmd);
+ else
+ vm_domain_free_lock(vmd);
+ if (atomic_load_bool(&initdone[domain]))
+ goto out;
+ pind = VM_FREEPOOL_LAZYINIT;
+ for (int freelist = 0; freelist < VM_NFREELIST; freelist++) {
+ int flind;
+
+ flind = vm_freelist_to_flind[freelist];
+ if (flind < 0)
+ continue;
+ fl = vm_phys_free_queues[domain][flind][pind];
+ for (int oind = 0; oind < VM_NFREEORDER; oind++) {
+ if (atomic_load_int(&fl[oind].lcnt) == 0)
+ continue;
+ while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) {
+ /*
+ * Avoid holding the lock across the
+ * initialization unless there's a free page
+ * shortage.
+ */
+ vm_freelist_rem(fl, m, oind);
+ unlocked = vm_domain_allocate(vmd,
+ VM_ALLOC_NORMAL, 1 << oind);
+ if (unlocked)
+ vm_domain_free_unlock(vmd);
+ vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m,
+ oind);
+ if (unlocked) {
+ vm_domain_freecnt_inc(vmd,
+ 1 << oind);
+ vm_domain_free_lock(vmd);
+ }
+ vm_phys_free_pages(m, oind);
+ }
+ }
+ }
+ atomic_store_bool(&initdone[domain], true);
+out:
+ if (!locked)
+ vm_domain_free_unlock(vmd);
+}
+
+static void
+vm_phys_lazy_init_kthr(void *arg __unused)
+{
+ for (int domain = 0; domain < vm_ndomains; domain++)
+ vm_phys_lazy_init(domain, false);
+ kthread_exit();
+}
+
+static void
+vm_phys_lazy_sysinit(void *arg __unused)
+{
+ struct thread *td;
+ int error;
+
+ error = kthread_add(vm_phys_lazy_init_kthr, NULL, curproc, &td,
+ RFSTOPPED, 0, "vmlazyinit");
+ if (error == 0) {
+ thread_lock(td);
+ sched_prio(td, PRI_MIN_IDLE);
+ sched_add(td, SRQ_BORING);
+ } else {
+ printf("%s: could not create lazy init thread: %d\n",
+ __func__, error);
+ for (int domain = 0; domain < vm_ndomains; domain++)
+ vm_phys_lazy_init(domain, false);
+ }
+}
+SYSINIT(vm_phys_lazy_init, SI_SUB_SMP, SI_ORDER_ANY, vm_phys_lazy_sysinit,
+ NULL);
+#endif /* VM_FREEPOOL_LAZYINIT */
+
/*
* Return the largest possible order of a set of pages starting at m.
*/
@@ -1157,6 +1286,7 @@
static vm_page_t
vm_phys_enqueue_contig_chunk(struct vm_freelist *fl, vm_page_t m, int order)
{
+ vm_page_t m_ret;
int npages;
KASSERT(order >= 0 && order < VM_NFREEORDER,
@@ -1164,7 +1294,17 @@
npages = 1 << order;
vm_freelist_add(fl, m, order, 1);
- return (m + npages);
+ m_ret = m + npages;
+#ifdef VM_FREEPOOL_LAZYINIT
+ if (__predict_false(m->pool == VM_FREEPOOL_LAZYINIT)) {
+ vm_paddr_t pa;
+
+ pa = m->phys_addr;
+ vm_page_init_page(m_ret, pa + ptoa(npages), m->segind,
+ VM_FREEPOOL_LAZYINIT);
+ }
+#endif
+ return (m_ret);
}
/*
@@ -1284,6 +1424,14 @@
pa_end = high <= seg->end ? high : seg->end;
if (pa_end - pa_start < ptoa(npages))
continue;
+
+#ifdef VM_FREEPOOL_LAZYINIT
+ /*
+ * The pages on the free lists must be initialized.
+ */
+ vm_phys_lazy_init(domain, false);
+#endif
+
m_start = &seg->first_page[atop(pa_start - seg->start)];
m_end = &seg->first_page[atop(pa_end - seg->start)];
m_run = vm_page_scan_contig(npages, m_start, m_end,
@@ -1302,21 +1450,30 @@
* The free page queues must be locked.
*/
bool
-vm_phys_unfree_page(vm_page_t m)
+vm_phys_unfree_page(vm_paddr_t pa)
{
struct vm_freelist *fl;
struct vm_phys_seg *seg;
- vm_paddr_t pa, pa_half;
- vm_page_t m_set, m_tmp;
+ vm_paddr_t pa_half;
+ vm_page_t m, m_set, m_tmp;
int order;
+ seg = vm_phys_seg(pa);
+ vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
+
+ /*
+ * The pages on the free lists must be initialized.
+ */
+#ifdef VM_FREEPOOL_LAZYINIT
+ vm_phys_lazy_init(seg->domain, true);
+#endif
+
/*
* First, find the contiguous, power of two-sized set of free
* physical pages containing the given physical page "m" and
* assign it to "m_set".
*/
- seg = &vm_phys_segs[m->segind];
- vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
+ m = vm_phys_paddr_to_vm_page(pa);
for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
order < VM_NFREEORDER - 1; ) {
order++;

File Metadata

Mime Type
text/plain
Expires
Fri, Apr 25, 10:04 AM (16 h, 3 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
17782030
Default Alt Text
D40403.id122777.diff (11 KB)

Event Timeline