Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F108408566
D38852.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
16 KB
Referenced Files
None
Subscribers
None
D38852.diff
View Options
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -366,7 +366,8 @@
struct callout td_slpcallout; /* (h) Callout for sleep. */
struct trapframe *td_frame; /* (k) */
vm_offset_t td_kstack; /* (a) Kernel VA of kstack. */
- int td_kstack_pages; /* (a) Size of the kstack. */
+ u_short td_kstack_pages; /* (a) Size of the kstack. */
+ u_short td_kstack_domain; /* (a) Domain backing kstack KVA. */
volatile u_int td_critnest; /* (k*) Critical section nest level. */
struct mdthread td_md; /* (k) Any machine-dependent fields. */
struct kaudit_record *td_ar; /* (k) Active audit record, if any. */
diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h
--- a/sys/vm/vm_extern.h
+++ b/sys/vm/vm_extern.h
@@ -127,8 +127,10 @@
void vm_imgact_unmap_page(struct sf_buf *sf);
void vm_thread_dispose(struct thread *td);
int vm_thread_new(struct thread *td, int pages);
-void vm_thread_stack_back(struct domainset *ds, vm_offset_t kaddr,
- vm_page_t ma[], int npages, int req_class);
+vm_pindex_t vm_kstack_pindex(vm_offset_t ks, int npages);
+vm_object_t vm_thread_kstack_size_to_obj(int npages);
+int vm_thread_stack_back(vm_offset_t kaddr, vm_page_t ma[], int npages,
+ int req_class, int domain);
u_int vm_active_count(void);
u_int vm_inactive_count(void);
u_int vm_laundry_count(void);
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -96,14 +96,23 @@
#include <vm/vm_map.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
+#include <vm/vm_pagequeue.h>
#include <vm/vm_object.h>
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
#include <vm/vm_pager.h>
#include <vm/swap_pager.h>
+#include <vm/vm_phys.h>
#include <machine/cpu.h>
+#if VM_NRESERVLEVEL > 0
+#define KVA_KSTACK_QUANTUM_SHIFT (VM_LEVEL_0_ORDER + PAGE_SHIFT)
+#else
+#define KVA_KSTACK_QUANTUM_SHIFT (8 + PAGE_SHIFT)
+#endif
+#define KVA_KSTACK_QUANTUM (1ul << KVA_KSTACK_QUANTUM_SHIFT)
+
/*
* MPSAFE
*
@@ -262,9 +271,11 @@
pmap_sync_icache(map->pmap, va, sz);
}
-vm_object_t kstack_object;
+static vm_object_t kstack_object;
+static vm_object_t kstack_alt_object;
static uma_zone_t kstack_cache;
static int kstack_cache_size;
+static vmem_t *vmd_kstack_arena[MAXMEMDOM];
static int
sysctl_kstack_cache_size(SYSCTL_HANDLER_ARGS)
@@ -282,63 +293,218 @@
sysctl_kstack_cache_size, "IU", "Maximum number of cached kernel stacks");
/*
- * Create the kernel stack (including pcb for i386) for a new thread.
+ * Allocate a virtual address range from a domain kstack arena, following
+ * the specified NUMA policy.
*/
static vm_offset_t
-vm_thread_stack_create(struct domainset *ds, int pages)
+vm_thread_alloc_kstack_kva(vm_size_t size, int domain)
{
- vm_page_t ma[KSTACK_MAX_PAGES];
- vm_offset_t ks;
- int i;
+#ifndef __ILP32__
+ int rv;
+ vmem_t *arena;
+ vm_offset_t addr = 0;
+
+ size = round_page(size);
+ /* Allocate from the kernel arena for non-standard kstack sizes. */
+ if (size != ptoa(kstack_pages + KSTACK_GUARD_PAGES)) {
+ arena = vm_dom[domain].vmd_kernel_arena;
+ } else {
+ arena = vmd_kstack_arena[domain];
+ }
+ rv = vmem_alloc(arena, size, M_BESTFIT | M_NOWAIT, &addr);
+ if (rv == ENOMEM)
+ return (0);
+ KASSERT(atop(addr - VM_MIN_KERNEL_ADDRESS) %
+ (kstack_pages + KSTACK_GUARD_PAGES) == 0,
+ ("%s: allocated kstack KVA not aligned to multiple of kstack size",
+ __func__));
+
+ return (addr);
+#else
+ return (kva_alloc(size));
+#endif
+}
+/*
+ * Release a region of kernel virtual memory
+ * allocated from the kstack arena.
+ */
+static __noinline void
+vm_thread_free_kstack_kva(vm_offset_t addr, vm_size_t size, int domain)
+{
+ vmem_t *arena;
+
+ size = round_page(size);
+#ifdef __ILP32__
+ arena = kernel_arena;
+#else
+ arena = vmd_kstack_arena[domain];
+ if (size != ptoa(kstack_pages + KSTACK_GUARD_PAGES)) {
+ arena = vm_dom[domain].vmd_kernel_arena;
+ }
+#endif
+ vmem_free(arena, addr, size);
+}
+
+static vmem_size_t
+vm_thread_kstack_import_quantum(void)
+{
+#ifndef __ILP32__
/*
- * Get a kernel virtual address for this thread's kstack.
+ * The kstack_quantum is larger than KVA_QUANTUM to account
+ * for holes induced by guard pages.
*/
- ks = kva_alloc((pages + KSTACK_GUARD_PAGES) * PAGE_SIZE);
- if (ks == 0) {
- printf("%s: kstack allocation failed\n", __func__);
- return (0);
+ return (KVA_KSTACK_QUANTUM * (kstack_pages + KSTACK_GUARD_PAGES));
+#else
+ return (KVA_KSTACK_QUANTUM);
+#endif
+}
+
+/*
+ * Import KVA from a parent arena into the kstack arena. Imports must be
+ * a multiple of kernel stack pages + guard pages in size.
+ *
+ * Kstack VA allocations need to be aligned so that the linear KVA pindex
+ * is divisible by the total number of kstack VA pages. This is necessary to
+ * make vm_kstack_pindex work properly.
+ *
+ * We import a multiple of KVA_KSTACK_QUANTUM-sized region from the parent
+ * arena. The actual size used by the kstack arena is one kstack smaller to
+ * allow for the necessary alignment adjustments to be made.
+ */
+static int
+vm_thread_kstack_arena_import(void *arena, vmem_size_t size, int flags,
+ vmem_addr_t *addrp)
+{
+ int error, rem;
+ size_t kpages = kstack_pages + KSTACK_GUARD_PAGES;
+
+ KASSERT(atop(size) % kpages == 0,
+ ("%s: Size %jd is not a multiple of kstack pages (%d)", __func__,
+ (intmax_t)size, (int)kpages));
+
+ error = vmem_xalloc(arena, vm_thread_kstack_import_quantum(),
+ KVA_KSTACK_QUANTUM, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, flags,
+ addrp);
+ if (error) {
+ return (error);
}
- if (KSTACK_GUARD_PAGES != 0) {
- pmap_qremove(ks, KSTACK_GUARD_PAGES);
- ks += KSTACK_GUARD_PAGES * PAGE_SIZE;
+ rem = atop(*addrp - VM_MIN_KERNEL_ADDRESS) % kpages;
+ if (rem != 0) {
+ /* Bump addr to next aligned address */
+ *addrp = *addrp + (kpages - rem) * PAGE_SIZE;
}
+ return (0);
+}
+
+/*
+ * Release KVA from a parent arena into the kstack arena. Released imports must
+ * be a multiple of kernel stack pages + guard pages in size.
+ */
+static void
+vm_thread_kstack_arena_release(void *arena, vmem_addr_t addr, vmem_size_t size)
+{
+ int rem;
+ size_t kpages __diagused = kstack_pages + KSTACK_GUARD_PAGES;
+
+ KASSERT(size % kpages == 0,
+ ("%s: Size %jd is not a multiple of kstack pages (%d)", __func__,
+ (intmax_t)size, (int)kpages));
+
+ KASSERT((addr - VM_MIN_KERNEL_ADDRESS) % kpages == 0,
+ ("%s: Address %p is not properly aligned (%p)", __func__,
+ (void *)addr, (void *)VM_MIN_KERNEL_ADDRESS));
/*
- * Allocate physical pages to back the stack.
+ * If the address is not KVA_KSTACK_QUANTUM-aligned we have to decrement
+ * it to account for the shift in kva_import_kstack.
*/
- vm_thread_stack_back(ds, ks, ma, pages, VM_ALLOC_NORMAL);
- for (i = 0; i < pages; i++)
- vm_page_valid(ma[i]);
- pmap_qenter(ks, ma, pages);
+ rem = addr % KVA_KSTACK_QUANTUM;
+ if (rem) {
+ KASSERT(rem <= ptoa(kpages),
+ ("%s: rem > kpages (%d), (%d)", __func__, rem,
+ (int)kpages));
+ addr -= rem;
+ }
+ vmem_xfree(arena, addr, vm_thread_kstack_import_quantum());
+}
+
+/*
+ * Create the kernel stack for a new thread.
+ */
+static vm_offset_t
+vm_thread_stack_create(struct domainset *ds, int pages)
+{
+ vm_page_t ma[KSTACK_MAX_PAGES];
+ struct vm_domainset_iter di;
+ int req = VM_ALLOC_NORMAL;
+ vm_object_t obj;
+ vm_offset_t ks;
+ int domain, i;
+
+ obj = vm_thread_kstack_size_to_obj(pages);
+ if (vm_ndomains > 1)
+ obj->domain.dr_policy = ds;
+ vm_domainset_iter_page_init(&di, obj, 0, &domain, &req);
+ do {
+ /*
+ * Get a kernel virtual address for this thread's kstack.
+ */
+ ks = vm_thread_alloc_kstack_kva(ptoa(pages + KSTACK_GUARD_PAGES),
+ domain);
+ if (ks == 0)
+ continue;
+ ks += ptoa(KSTACK_GUARD_PAGES);
- return (ks);
+ /*
+ * Allocate physical pages to back the stack.
+ */
+ if (vm_thread_stack_back(ks, ma, pages, req, domain) != 0) {
+ vm_thread_free_kstack_kva(ks - ptoa(KSTACK_GUARD_PAGES),
+ ptoa(pages + KSTACK_GUARD_PAGES), domain);
+ continue;
+ }
+ if (KSTACK_GUARD_PAGES != 0) {
+ pmap_qremove(ks - ptoa(KSTACK_GUARD_PAGES),
+ KSTACK_GUARD_PAGES);
+ }
+ for (i = 0; i < pages; i++)
+ vm_page_valid(ma[i]);
+ pmap_qenter(ks, ma, pages);
+ return (ks);
+ } while (vm_domainset_iter_page(&di, obj, &domain) == 0);
+
+ return (0);
}
-static void
+static __noinline void
vm_thread_stack_dispose(vm_offset_t ks, int pages)
{
vm_page_t m;
vm_pindex_t pindex;
- int i;
-
- pindex = atop(ks - VM_MIN_KERNEL_ADDRESS);
+ int i, domain;
+ vm_object_t obj = vm_thread_kstack_size_to_obj(pages);
+ pindex = vm_kstack_pindex(ks, pages);
+ domain = vm_phys_domain(vtophys(ks));
pmap_qremove(ks, pages);
- VM_OBJECT_WLOCK(kstack_object);
+ VM_OBJECT_WLOCK(obj);
for (i = 0; i < pages; i++) {
- m = vm_page_lookup(kstack_object, pindex + i);
+ m = vm_page_lookup(obj, pindex + i);
if (m == NULL)
panic("%s: kstack already missing?", __func__);
+ KASSERT(vm_page_domain(m) == domain,
+ ("%s: page %p domain mismatch, expected %d got %d",
+ __func__, m, domain, vm_page_domain(m)));
vm_page_xbusy_claim(m);
vm_page_unwire_noq(m);
vm_page_free(m);
}
- VM_OBJECT_WUNLOCK(kstack_object);
+ VM_OBJECT_WUNLOCK(obj);
kasan_mark((void *)ks, ptoa(pages), ptoa(pages), 0);
- kva_free(ks - (KSTACK_GUARD_PAGES * PAGE_SIZE),
- (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE);
+ vm_thread_free_kstack_kva(ks - (KSTACK_GUARD_PAGES * PAGE_SIZE),
+ ptoa(pages + KSTACK_GUARD_PAGES), domain);
}
/*
@@ -348,6 +514,7 @@
vm_thread_new(struct thread *td, int pages)
{
vm_offset_t ks;
+ u_short ks_domain;
/* Bounds check */
if (pages <= 1)
@@ -369,8 +536,13 @@
pages);
if (ks == 0)
return (0);
+
+ ks_domain = vm_phys_domain(vtophys(ks));
+ KASSERT(ks_domain >= 0 && ks_domain < vm_ndomains,
+ ("%s: invalid domain for kstack %p", __func__, (void *)ks));
td->td_kstack = ks;
td->td_kstack_pages = pages;
+ td->td_kstack_domain = ks_domain;
kasan_mark((void *)ks, ptoa(pages), ptoa(pages), 0);
kmsan_mark((void *)ks, ptoa(pages), KMSAN_STATE_UNINIT);
return (1);
@@ -389,6 +561,7 @@
ks = td->td_kstack;
td->td_kstack = 0;
td->td_kstack_pages = 0;
+ td->td_kstack_domain = MAXMEMDOM;
kasan_mark((void *)ks, 0, ptoa(pages), KASAN_KSTACK_FREED);
if (pages == kstack_pages)
uma_zfree(kstack_cache, (void *)ks);
@@ -396,33 +569,78 @@
vm_thread_stack_dispose(ks, pages);
}
+/*
+ * Calculate kstack pindex.
+ *
+ * Uses a non-identity mapping if guard pages are
+ * active to avoid pindex holes in the kstack object.
+ */
+vm_pindex_t
+vm_kstack_pindex(vm_offset_t ks, int kpages)
+{
+ vm_pindex_t pindex = atop(ks - VM_MIN_KERNEL_ADDRESS);
+
+#ifdef __ILP32__
+ return (pindex);
+#else
+ /*
+ * Return the linear pindex if guard pages aren't active or if we are
+ * allocating a non-standard kstack size.
+ */
+ if (KSTACK_GUARD_PAGES == 0 || kpages != kstack_pages) {
+ return (pindex);
+ }
+ KASSERT(pindex % (kpages + KSTACK_GUARD_PAGES) >= KSTACK_GUARD_PAGES,
+ ("%s: Attempting to calculate kstack guard page pindex", __func__));
+
+ return (pindex -
+ (pindex / (kpages + KSTACK_GUARD_PAGES) + 1) * KSTACK_GUARD_PAGES);
+#endif
+}
+
/*
* Allocate physical pages, following the specified NUMA policy, to back a
* kernel stack.
*/
-void
-vm_thread_stack_back(struct domainset *ds, vm_offset_t ks, vm_page_t ma[],
- int npages, int req_class)
+int
+vm_thread_stack_back(vm_offset_t ks, vm_page_t ma[], int npages, int req_class,
+ int domain)
{
+ vm_object_t obj = vm_thread_kstack_size_to_obj(npages);
vm_pindex_t pindex;
+ vm_page_t m;
int n;
- pindex = atop(ks - VM_MIN_KERNEL_ADDRESS);
+ pindex = vm_kstack_pindex(ks, npages);
- VM_OBJECT_WLOCK(kstack_object);
+ VM_OBJECT_WLOCK(obj);
for (n = 0; n < npages;) {
- if (vm_ndomains > 1)
- kstack_object->domain.dr_policy = ds;
-
- /*
- * Use WAITFAIL to force a reset of the domain selection policy
- * if we had to sleep for pages.
- */
- n += vm_page_grab_pages(kstack_object, pindex + n,
- req_class | VM_ALLOC_WIRED | VM_ALLOC_WAITFAIL,
- &ma[n], npages - n);
+ m = vm_page_grab(obj, pindex + n,
+ VM_ALLOC_NOCREAT | VM_ALLOC_WIRED);
+ if (m == NULL) {
+ m = vm_page_alloc_domain(obj, pindex + n, domain,
+ req_class | VM_ALLOC_WIRED);
+ }
+ if (m == NULL)
+ break;
+ ma[n++] = m;
}
- VM_OBJECT_WUNLOCK(kstack_object);
+ if (n < npages)
+ goto cleanup;
+ VM_OBJECT_WUNLOCK(obj);
+
+ return (0);
+cleanup:
+ vm_object_page_remove(obj, pindex, pindex + n, 0);
+ VM_OBJECT_WUNLOCK(obj);
+
+ return (ENOMEM);
+}
+
+vm_object_t
+vm_thread_kstack_size_to_obj(int npages)
+{
+ return (npages == kstack_pages ? kstack_object : kstack_alt_object);
}
static int
@@ -459,6 +677,9 @@
static void
kstack_cache_init(void *null)
{
+ vm_size_t kstack_quantum;
+ int domain;
+
kstack_object = vm_object_allocate(OBJT_SWAP,
atop(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS));
kstack_cache = uma_zcache_create("kstack_cache",
@@ -467,6 +688,31 @@
UMA_ZONE_FIRSTTOUCH);
kstack_cache_size = imax(128, mp_ncpus * 4);
uma_zone_set_maxcache(kstack_cache, kstack_cache_size);
+
+ kstack_alt_object = vm_object_allocate(OBJT_SWAP,
+ atop(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS));
+
+ kstack_quantum = vm_thread_kstack_import_quantum();
+ /*
+ * Reduce size used by the kstack arena to allow for
+ * alignment adjustments in vm_thread_kstack_arena_import.
+ */
+ kstack_quantum -= (kstack_pages + KSTACK_GUARD_PAGES) * PAGE_SIZE;
+ /*
+ * Create the kstack_arena for each domain and set kernel_arena as
+ * parent.
+ */
+ for (domain = 0; domain < vm_ndomains; domain++) {
+ vmd_kstack_arena[domain] = vmem_create("kstack arena", 0, 0,
+ PAGE_SIZE, 0, M_WAITOK);
+ KASSERT(vmd_kstack_arena[domain] != NULL,
+ ("%s: failed to create domain %d kstack_arena", __func__,
+ domain));
+ vmem_set_import(vmd_kstack_arena[domain],
+ vm_thread_kstack_arena_import,
+ vm_thread_kstack_arena_release,
+ vm_dom[domain].vmd_kernel_arena, kstack_quantum);
+ }
}
SYSINIT(vm_kstacks, SI_SUB_KMEM, SI_ORDER_ANY, kstack_cache_init, NULL);
diff --git a/sys/vm/vm_kern.h b/sys/vm/vm_kern.h
--- a/sys/vm/vm_kern.h
+++ b/sys/vm/vm_kern.h
@@ -76,6 +76,4 @@
extern u_long vm_kmem_size;
extern u_int exec_map_entries;
extern u_int exec_map_entry_size;
-extern vm_object_t kstack_object;
-
#endif /* _VM_VM_KERN_H_ */
diff --git a/sys/vm/vm_swapout.c b/sys/vm/vm_swapout.c
--- a/sys/vm/vm_swapout.c
+++ b/sys/vm/vm_swapout.c
@@ -530,22 +530,24 @@
vm_offset_t kaddr;
vm_pindex_t pindex;
int i, pages;
+ vm_object_t obj;
cpu_thread_swapout(td);
kaddr = td->td_kstack;
pages = td->td_kstack_pages;
- pindex = atop(kaddr - VM_MIN_KERNEL_ADDRESS);
+ obj = vm_thread_kstack_size_to_obj(pages);
+ pindex = vm_kstack_pindex(kaddr, pages);
pmap_qremove(kaddr, pages);
- VM_OBJECT_WLOCK(kstack_object);
+ VM_OBJECT_WLOCK(obj);
for (i = 0; i < pages; i++) {
- m = vm_page_lookup(kstack_object, pindex + i);
+ m = vm_page_lookup(obj, pindex + i);
if (m == NULL)
panic("vm_thread_swapout: kstack already missing?");
vm_page_dirty(m);
vm_page_xunbusy_unchecked(m);
vm_page_unwire(m, PQ_LAUNDRY);
}
- VM_OBJECT_WUNLOCK(kstack_object);
+ VM_OBJECT_WUNLOCK(obj);
}
/*
@@ -556,31 +558,34 @@
{
vm_page_t ma[KSTACK_MAX_PAGES];
vm_offset_t kaddr;
+ vm_object_t obj;
int a, count, i, j, pages, rv __diagused;
kaddr = td->td_kstack;
pages = td->td_kstack_pages;
- vm_thread_stack_back(td->td_domain.dr_policy, kaddr, ma, pages,
- oom_alloc);
+ obj = vm_thread_kstack_size_to_obj(pages);
+ while (vm_thread_stack_back(kaddr, ma, pages, oom_alloc,
+ td->td_kstack_domain) == ENOMEM)
+ ;
for (i = 0; i < pages;) {
vm_page_assert_xbusied(ma[i]);
if (vm_page_all_valid(ma[i])) {
i++;
continue;
}
- vm_object_pip_add(kstack_object, 1);
+ vm_object_pip_add(obj, 1);
for (j = i + 1; j < pages; j++)
if (vm_page_all_valid(ma[j]))
break;
- VM_OBJECT_WLOCK(kstack_object);
- rv = vm_pager_has_page(kstack_object, ma[i]->pindex, NULL, &a);
- VM_OBJECT_WUNLOCK(kstack_object);
+ VM_OBJECT_WLOCK(obj);
+ rv = vm_pager_has_page(obj, ma[i]->pindex, NULL, &a);
+ VM_OBJECT_WUNLOCK(obj);
KASSERT(rv == 1, ("%s: missing page %p", __func__, ma[i]));
count = min(a + 1, j - i);
- rv = vm_pager_get_pages(kstack_object, ma + i, count, NULL, NULL);
+ rv = vm_pager_get_pages(obj, ma + i, count, NULL, NULL);
KASSERT(rv == VM_PAGER_OK, ("%s: cannot get kstack for proc %d",
__func__, td->td_proc->p_pid));
- vm_object_pip_wakeup(kstack_object);
+ vm_object_pip_wakeup(obj);
i += count;
}
pmap_qenter(kaddr, ma, pages);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Jan 25, 1:25 PM (17 h, 48 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16145064
Default Alt Text
D38852.diff (16 KB)
Attached To
Mode
D38852: vm: improve kstack_object pindex calculation scheme to avoid pindex holes
Attached
Detach File
Event Timeline
Log In to Comment