Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F108442855
D14633.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
183 KB
Referenced Files
None
Subscribers
None
D14633.diff
View Options
Index: gnu/usr.bin/gdb/kgdb/trgt_i386.c
===================================================================
--- gnu/usr.bin/gdb/kgdb/trgt_i386.c
+++ gnu/usr.bin/gdb/kgdb/trgt_i386.c
@@ -29,6 +29,8 @@
#include <sys/param.h>
#include <sys/proc.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
#include <machine/pcb.h>
#include <machine/frame.h>
#include <machine/segments.h>
@@ -279,12 +281,26 @@
char buf[MAX_REGISTER_SIZE];
struct kgdb_frame_cache *cache;
char *pname;
+ CORE_ADDR pcx;
+ uintptr_t addr, setidt_disp;
cache = *this_cache;
if (cache == NULL) {
cache = FRAME_OBSTACK_ZALLOC(struct kgdb_frame_cache);
*this_cache = cache;
- cache->pc = frame_func_unwind(next_frame);
+ pcx = frame_pc_unwind(next_frame);
+ if (pcx >= PMAP_TRM_MIN_ADDRESS) {
+ addr = kgdb_lookup("setidt_disp");
+ if (addr != 0) {
+ if (kvm_read(kvm, addr, &setidt_disp,
+ sizeof(setidt_disp)) !=
+ sizeof(setidt_disp))
+ warnx("kvm_read: %s", kvm_geterr(kvm));
+ else
+ pcx -= setidt_disp;
+ }
+ }
+ cache->pc = pcx;
find_pc_partial_function(cache->pc, &pname, NULL, NULL);
if (pname[0] != 'X')
cache->frame_type = FT_NORMAL;
@@ -373,6 +389,8 @@
CORE_ADDR pc;
pc = frame_pc_unwind(next_frame);
+ if (pc >= PMAP_TRM_MIN_ADDRESS)
+ return (&kgdb_trgt_trapframe_unwind);
pname = NULL;
find_pc_partial_function(pc, &pname, NULL, NULL);
if (pname == NULL)
Index: sys/conf/files.i386
===================================================================
--- sys/conf/files.i386
+++ sys/conf/files.i386
@@ -483,6 +483,7 @@
i386/i386/bios.c standard
i386/i386/bioscall.s standard
i386/i386/bpf_jit_machdep.c optional bpf_jitter
+i386/i386/copyout.c standard
i386/i386/db_disasm.c optional ddb
i386/i386/db_interface.c optional ddb
i386/i386/db_trace.c optional ddb
Index: sys/conf/ldscript.i386
===================================================================
--- sys/conf/ldscript.i386
+++ sys/conf/ldscript.i386
@@ -6,7 +6,7 @@
SECTIONS
{
/* Read-only sections, merged into text segment: */
- . = kernbase + kernload + SIZEOF_HEADERS;
+ . = kernbase + SIZEOF_HEADERS;
.interp : { *(.interp) }
.hash : { *(.hash) }
.gnu.hash : { *(.gnu.hash) }
Index: sys/dev/dcons/dcons_crom.c
===================================================================
--- sys/dev/dcons/dcons_crom.c
+++ sys/dev/dcons/dcons_crom.c
@@ -109,7 +109,11 @@
static off_t idt_paddr;
/* XXX */
+#ifdef __amd64__
idt_paddr = (char *)idt - (char *)KERNBASE;
+#else /* __i386__ */
+ idt_paddr = (off_t)pmap_kextract((vm_offset_t)idt);
+#endif
crom_add_entry(&sc->unit, DCONS_CSR_KEY_RESET_HI, ADDR_HI(idt_paddr));
crom_add_entry(&sc->unit, DCONS_CSR_KEY_RESET_LO, ADDR_LO(idt_paddr));
Index: sys/dev/dcons/dcons_os.c
===================================================================
--- sys/dev/dcons/dcons_os.c
+++ sys/dev/dcons/dcons_os.c
@@ -309,11 +309,16 @@
* Allow read/write access to dcons buffer.
*/
for (pa = trunc_page(addr); pa < addr + size; pa += PAGE_SIZE)
- *vtopte(KERNBASE + pa) |= PG_RW;
+ *vtopte(PMAP_MAP_LOW + pa) |= PG_RW;
invltlb();
#endif
/* XXX P to V */
+#ifdef __amd64__
dg.buf = (struct dcons_buf *)(vm_offset_t)(KERNBASE + addr);
+#else /* __i386__ */
+ dg.buf = (struct dcons_buf *)((vm_offset_t)PMAP_MAP_LOW +
+ addr);
+#endif
dg.size = size;
if (dcons_load_buffer(dg.buf, dg.size, sc) < 0)
dg.buf = NULL;
Index: sys/dev/hyperv/vmbus/i386/vmbus_vector.S
===================================================================
--- sys/dev/hyperv/vmbus/i386/vmbus_vector.S
+++ sys/dev/hyperv/vmbus/i386/vmbus_vector.S
@@ -26,11 +26,12 @@
* $FreeBSD$
*/
+#include "assym.inc"
+
+#include <machine/psl.h>
#include <machine/asmacros.h>
#include <machine/specialreg.h>
-#include "assym.inc"
-
/*
* This is the Hyper-V vmbus channel direct callback interrupt.
* Only used when it is running on Hyper-V.
@@ -42,6 +43,7 @@
PUSH_FRAME
SET_KERNEL_SREGS
cld
+ KENTER
FAKE_MCOUNT(TF_EIP(%esp))
pushl %esp
call vmbus_handle_intr
Index: sys/dev/ppc/ppc.c
===================================================================
--- sys/dev/ppc/ppc.c
+++ sys/dev/ppc/ppc.c
@@ -51,6 +51,7 @@
#include <vm/vm.h>
#include <vm/pmap.h>
#include <machine/vmparam.h>
+#include <machine/pc/bios.h>
#endif
#include <dev/ppbus/ppbconf.h>
@@ -121,7 +122,7 @@
* BIOS printer list - used by BIOS probe.
*/
#define BIOS_PPC_PORTS 0x408
-#define BIOS_PORTS (short *)(KERNBASE+BIOS_PPC_PORTS)
+#define BIOS_PORTS ((short *)BIOS_PADDRTOVADDR(BIOS_PPC_PORTS))
#define BIOS_MAX_PPC 4
#endif
Index: sys/dev/syscons/syscons.c
===================================================================
--- sys/dev/syscons/syscons.c
+++ sys/dev/syscons/syscons.c
@@ -288,7 +288,11 @@
* This is enough for ec_putc() to work very early on x86
* if the kernel starts in normal color text mode.
*/
+#ifdef __amd64__
fb = KERNBASE + 0xb8000;
+#else /* __i386__ */
+ fb = PMAP_MAP_LOW + 0xb8000;
+#endif
xsize = 80;
ysize = 25;
#endif
Index: sys/i386/conf/NOTES
===================================================================
--- sys/i386/conf/NOTES
+++ sys/i386/conf/NOTES
@@ -894,19 +894,6 @@
#
options PMAP_SHPGPERPROC=201
-#
-# Change the size of the kernel virtual address space. Due to
-# constraints in loader(8) on i386, this must be a multiple of 4.
-# 256 = 1 GB of kernel address space. Increasing this also causes
-# a reduction of the address space in user processes. 512 splits
-# the 4GB cpu address space in half (2GB user, 2GB kernel). For PAE
-# kernels, the value will need to be double non-PAE. A value of 1024
-# for PAE kernels is necessary to split the address space in half.
-# This will likely need to be increased to handle memory sizes >4GB.
-# PAE kernels default to a value of 512.
-#
-options KVA_PAGES=260
-
#
# Number of initial kernel page table pages used for early bootstrap.
# This number should include enough pages to map the kernel, any
@@ -951,22 +938,6 @@
#####################################################################
# VM OPTIONS
-# Disable the 4 MByte page PSE CPU feature. The PSE feature allows the
-# kernel to use 4 MByte pages to map the kernel instead of 4k pages.
-# This saves on the amount of memory needed for page tables needed to
-# map the kernel. You should only disable this feature as a temporary
-# workaround if you are having problems with it enabled.
-#
-#options DISABLE_PSE
-
-# Disable the global pages PGE CPU feature. The PGE feature allows pages
-# to be marked with the PG_G bit. TLB entries for these pages are not
-# flushed from the cache when %cr3 is reloaded. This can make context
-# switches less expensive. You should only disable this feature as a
-# temporary workaround if you are having problems with it enabled.
-#
-#options DISABLE_PG_G
-
# KSTACK_PAGES is the number of memory pages to assign to the kernel
# stack of each thread.
Index: sys/i386/i386/apic_vector.s
===================================================================
--- sys/i386/i386/apic_vector.s
+++ sys/i386/i386/apic_vector.s
@@ -39,6 +39,7 @@
#include "opt_smp.h"
#include <machine/asmacros.h>
+#include <machine/psl.h>
#include <machine/specialreg.h>
#include <x86/apicreg.h>
@@ -67,34 +68,39 @@
* translates that into a vector, and passes the vector to the
* lapic_handle_intr() function.
*/
-#define ISR_VEC(index, vec_name) \
- .text ; \
- SUPERALIGN_TEXT ; \
-IDTVEC(vec_name ## _pti) ; \
-IDTVEC(vec_name) ; \
- PUSH_FRAME ; \
- SET_KERNEL_SREGS ; \
- cld ; \
- FAKE_MCOUNT(TF_EIP(%esp)) ; \
- cmpl $0,x2apic_mode ; \
- je 1f ; \
- movl $(MSR_APIC_ISR0 + index),%ecx ; \
- rdmsr ; \
- jmp 2f ; \
-1: ; \
- movl lapic_map, %edx ;/* pointer to local APIC */ \
- movl LA_ISR + 16 * (index)(%edx), %eax ; /* load ISR */ \
-2: ; \
- bsrl %eax, %eax ; /* index of highest set bit in ISR */ \
- jz 3f ; \
- addl $(32 * index),%eax ; \
- pushl %esp ; \
- pushl %eax ; /* pass the IRQ */ \
- call lapic_handle_intr ; \
- addl $8, %esp ; /* discard parameter */ \
-3: ; \
- MEXITCOUNT ; \
+ .macro ISR_VEC index, vec_name
+ .text
+ SUPERALIGN_TEXT
+ .globl X\()\vec_name\()_pti, X\()\vec_name
+
+X\()\vec_name\()_pti:
+X\()\vec_name:
+ PUSH_FRAME
+ SET_KERNEL_SREGS
+ cld
+ KENTER
+ FAKE_MCOUNT(TF_EIP(%esp))
+ cmpl $0,x2apic_mode
+ je 2f
+ movl $(MSR_APIC_ISR0 + \index),%ecx
+ rdmsr
+ jmp 3f
+2:
+ movl lapic_map, %edx /* pointer to local APIC */
+ movl LA_ISR + 16 * \index(%edx), %eax /* load ISR */
+3:
+ bsrl %eax, %eax /* index of highest set bit in ISR */
+ jz 4f
+ addl $(32 * \index),%eax
+ pushl %esp
+ pushl %eax /* pass the IRQ */
+ movl $lapic_handle_intr, %eax
+ call *%eax
+ addl $8, %esp /* discard parameter */
+4:
+ MEXITCOUNT
jmp doreti
+ .endm
/*
* Handle "spurious INTerrupts".
@@ -111,13 +117,13 @@
iret
- ISR_VEC(1, apic_isr1)
- ISR_VEC(2, apic_isr2)
- ISR_VEC(3, apic_isr3)
- ISR_VEC(4, apic_isr4)
- ISR_VEC(5, apic_isr5)
- ISR_VEC(6, apic_isr6)
- ISR_VEC(7, apic_isr7)
+ ISR_VEC 1, apic_isr1
+ ISR_VEC 2, apic_isr2
+ ISR_VEC 3, apic_isr3
+ ISR_VEC 4, apic_isr4
+ ISR_VEC 5, apic_isr5
+ ISR_VEC 6, apic_isr6
+ ISR_VEC 7, apic_isr7
/*
* Local APIC periodic timer handler.
@@ -129,9 +135,11 @@
PUSH_FRAME
SET_KERNEL_SREGS
cld
+ KENTER
FAKE_MCOUNT(TF_EIP(%esp))
pushl %esp
- call lapic_handle_timer
+ movl $lapic_handle_timer, %eax
+ call *%eax
add $4, %esp
MEXITCOUNT
jmp doreti
@@ -146,8 +154,10 @@
PUSH_FRAME
SET_KERNEL_SREGS
cld
+ KENTER
FAKE_MCOUNT(TF_EIP(%esp))
- call lapic_handle_cmc
+ movl $lapic_handle_cmc, %eax
+ call *%eax
MEXITCOUNT
jmp doreti
@@ -161,8 +171,10 @@
PUSH_FRAME
SET_KERNEL_SREGS
cld
+ KENTER
FAKE_MCOUNT(TF_EIP(%esp))
- call lapic_handle_error
+ movl $lapic_handle_error, %eax
+ call *%eax
MEXITCOUNT
jmp doreti
@@ -177,9 +189,11 @@
PUSH_FRAME
SET_KERNEL_SREGS
cld
+ KENTER
FAKE_MCOUNT(TF_EIP(%esp))
pushl %esp
- call xen_intr_handle_upcall
+ movl $xen_intr_handle_upcall, %eax
+ call *%eax
add $4, %esp
MEXITCOUNT
jmp doreti
@@ -200,9 +214,9 @@
PUSH_FRAME
SET_KERNEL_SREGS
cld
-
- call invltlb_handler
-
+ KENTER
+ movl $invltlb_handler, %eax
+ call *%eax
jmp invltlb_ret
/*
@@ -214,9 +228,9 @@
PUSH_FRAME
SET_KERNEL_SREGS
cld
-
- call invlpg_handler
-
+ KENTER
+ movl $invlpg_handler, %eax
+ call *%eax
jmp invltlb_ret
/*
@@ -228,9 +242,9 @@
PUSH_FRAME
SET_KERNEL_SREGS
cld
-
- call invlrng_handler
-
+ KENTER
+ movl $invlrng_handler, %eax
+ call *%eax
jmp invltlb_ret
/*
@@ -242,9 +256,9 @@
PUSH_FRAME
SET_KERNEL_SREGS
cld
-
- call invlcache_handler
-
+ KENTER
+ movl $invlcache_handler, %eax
+ call *%eax
jmp invltlb_ret
/*
@@ -256,12 +270,11 @@
PUSH_FRAME
SET_KERNEL_SREGS
cld
-
+ KENTER
call as_lapic_eoi
-
FAKE_MCOUNT(TF_EIP(%esp))
-
- call ipi_bitmap_handler
+ movl $ipi_bitmap_handler, %eax
+ call *%eax
MEXITCOUNT
jmp doreti
@@ -274,9 +287,10 @@
PUSH_FRAME
SET_KERNEL_SREGS
cld
-
+ KENTER
call as_lapic_eoi
- call cpustop_handler
+ movl $cpustop_handler, %eax
+ call *%eax
jmp doreti
/*
@@ -288,9 +302,10 @@
PUSH_FRAME
SET_KERNEL_SREGS
cld
-
+ KENTER
call as_lapic_eoi
- call cpususpend_handler
+ movl $cpususpend_handler, %eax
+ call *%eax
jmp doreti
/*
@@ -304,14 +319,14 @@
PUSH_FRAME
SET_KERNEL_SREGS
cld
-
+ KENTER
#ifdef COUNT_IPIS
movl PCPU(CPUID), %eax
movl ipi_rendezvous_counts(,%eax,4), %eax
incl (%eax)
#endif
- call smp_rendezvous_action
-
+ movl $smp_rendezvous_action, %eax
+ call *%eax
call as_lapic_eoi
jmp doreti
Index: sys/i386/i386/atpic_vector.s
===================================================================
--- sys/i386/i386/atpic_vector.s
+++ sys/i386/i386/atpic_vector.s
@@ -36,6 +36,7 @@
* master and slave interrupt controllers.
*/
+#include <machine/psl.h>
#include <machine/asmacros.h>
#include "assym.inc"
@@ -43,37 +44,41 @@
/*
* Macros for interrupt entry, call to handler, and exit.
*/
-#define INTR(irq_num, vec_name) \
- .text ; \
- SUPERALIGN_TEXT ; \
-IDTVEC(vec_name ##_pti) ; \
-IDTVEC(vec_name) ; \
- PUSH_FRAME ; \
- SET_KERNEL_SREGS ; \
- cld ; \
-; \
- FAKE_MCOUNT(TF_EIP(%esp)) ; \
- pushl %esp ; \
- pushl $irq_num; /* pass the IRQ */ \
- call atpic_handle_intr ; \
- addl $8, %esp ; /* discard the parameters */ \
-; \
- MEXITCOUNT ; \
+ .macro INTR irq_num, vec_name
+ .text
+ SUPERALIGN_TEXT
+ .globl X\()\vec_name\()_pti, X\()\vec_name
+
+X\()\vec_name\()_pti:
+X\()\vec_name:
+ PUSH_FRAME
+ SET_KERNEL_SREGS
+ cld
+ KENTER
+ FAKE_MCOUNT(TF_EIP(%esp))
+ pushl %esp
+ pushl $\irq_num /* pass the IRQ */
+ movl $atpic_handle_intr, %eax
+ call *%eax
+ addl $8, %esp /* discard the parameters */
+
+ MEXITCOUNT
jmp doreti
+ .endm
- INTR(0, atpic_intr0)
- INTR(1, atpic_intr1)
- INTR(2, atpic_intr2)
- INTR(3, atpic_intr3)
- INTR(4, atpic_intr4)
- INTR(5, atpic_intr5)
- INTR(6, atpic_intr6)
- INTR(7, atpic_intr7)
- INTR(8, atpic_intr8)
- INTR(9, atpic_intr9)
- INTR(10, atpic_intr10)
- INTR(11, atpic_intr11)
- INTR(12, atpic_intr12)
- INTR(13, atpic_intr13)
- INTR(14, atpic_intr14)
- INTR(15, atpic_intr15)
+ INTR 0, atpic_intr0
+ INTR 1, atpic_intr1
+ INTR 2, atpic_intr2
+ INTR 3, atpic_intr3
+ INTR 4, atpic_intr4
+ INTR 5, atpic_intr5
+ INTR 6, atpic_intr6
+ INTR 7, atpic_intr7
+ INTR 8, atpic_intr8
+ INTR 9, atpic_intr9
+ INTR 10, atpic_intr10
+ INTR 11, atpic_intr11
+ INTR 12, atpic_intr12
+ INTR 13, atpic_intr13
+ INTR 14, atpic_intr14
+ INTR 15, atpic_intr15
Index: sys/i386/i386/bios.c
===================================================================
--- sys/i386/i386/bios.c
+++ sys/i386/i386/bios.c
@@ -305,6 +305,7 @@
}
extern int vm86pa;
+extern u_long vm86phystk;
extern void bios16_jmp(void);
/*
@@ -329,7 +330,7 @@
int flags = BIOSCODE_FLAG | BIOSDATA_FLAG;
u_int i, arg_start, arg_end;
pt_entry_t *pte;
- pd_entry_t *ptd;
+ pd_entry_t *ptd, orig_ptd;
arg_start = 0xffffffff;
arg_end = 0;
@@ -390,27 +391,14 @@
args->seg.code32.base = (u_int)&bios16_jmp & PG_FRAME;
args->seg.code32.limit = 0xffff;
- ptd = (pd_entry_t *)rcr3();
-#if defined(PAE) || defined(PAE_TABLES)
- if (ptd == IdlePDPT)
-#else
- if (ptd == IdlePTD)
-#endif
- {
- /*
- * no page table, so create one and install it.
- */
- pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
- ptd = (pd_entry_t *)((u_int)IdlePTD + KERNBASE);
- *pte = (vm86pa - PAGE_SIZE) | PG_RW | PG_V;
- *ptd = vtophys(pte) | PG_RW | PG_V;
- } else {
- /*
- * this is a user-level page table
- */
- pte = PTmap;
- *pte = (vm86pa - PAGE_SIZE) | PG_RW | PG_V;
- }
+ /*
+ * no page table, so create one and install it.
+ */
+ pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
+ ptd = IdlePTD;
+ *pte = vm86phystk | PG_RW | PG_V;
+ orig_ptd = *ptd;
+ *ptd = vtophys(pte) | PG_RW | PG_V;
pmap_invalidate_all(kernel_pmap); /* XXX insurance for now */
stack_top = stack;
@@ -464,20 +452,12 @@
i = bios16_call(&args->r, stack_top);
- if (pte == PTmap) {
- *pte = 0; /* remove entry */
- /*
- * XXX only needs to be invlpg(0) but that doesn't work on the 386
- */
- pmap_invalidate_all(kernel_pmap);
- } else {
- *ptd = 0; /* remove page table */
- /*
- * XXX only needs to be invlpg(0) but that doesn't work on the 386
- */
- pmap_invalidate_all(kernel_pmap);
- free(pte, M_TEMP); /* ... and free it */
- }
+ *ptd = orig_ptd; /* remove page table */
+ /*
+ * XXX only needs to be invlpg(0) but that doesn't work on the 386
+ */
+ pmap_invalidate_all(kernel_pmap);
+ free(pte, M_TEMP); /* ... and free it */
return (i);
}
Index: sys/i386/i386/copyout.c
===================================================================
--- /dev/null
+++ sys/i386/i386/copyout.c
@@ -0,0 +1,489 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2018 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_extern.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+
+#if defined(PAE) || defined(PAE_TABLES)
+#define KCR3 ((u_int)IdlePDPT)
+#else
+#define KCR3 ((u_int)IdlePTD)
+#endif
+
+int copyin_fast(const void *udaddr, void *kaddr, size_t len, u_int);
+static int (*copyin_fast_tramp)(const void *, void *, size_t, u_int);
+int copyout_fast(const void *kaddr, void *udaddr, size_t len, u_int);
+static int (*copyout_fast_tramp)(const void *, void *, size_t, u_int);
+int fubyte_fast(volatile const void *base, u_int kcr3);
+static int (*fubyte_fast_tramp)(volatile const void *, u_int);
+int fuword16_fast(volatile const void *base, u_int kcr3);
+static int (*fuword16_fast_tramp)(volatile const void *, u_int);
+int fueword_fast(volatile const void *base, long *val, u_int kcr3);
+static int (*fueword_fast_tramp)(volatile const void *, long *, u_int);
+int subyte_fast(volatile void *base, int val, u_int kcr3);
+static int (*subyte_fast_tramp)(volatile void *, int, u_int);
+int suword16_fast(volatile void *base, int val, u_int kcr3);
+static int (*suword16_fast_tramp)(volatile void *, int, u_int);
+int suword_fast(volatile void *base, long val, u_int kcr3);
+static int (*suword_fast_tramp)(volatile void *, long, u_int);
+
+static int fast_copyout = 1;
+SYSCTL_INT(_machdep, OID_AUTO, fast_copyout, CTLFLAG_RWTUN,
+ &fast_copyout, 0,
+ "");
+
+void
+copyout_init_tramp(void)
+{
+
+ copyin_fast_tramp = (int (*)(const void *, void *, size_t, u_int))(
+ (uintptr_t)copyin_fast + setidt_disp);
+ copyout_fast_tramp = (int (*)(const void *, void *, size_t, u_int))(
+ (uintptr_t)copyout_fast + setidt_disp);
+ fubyte_fast_tramp = (int (*)(volatile const void *, u_int))(
+ (uintptr_t)fubyte_fast + setidt_disp);
+ fuword16_fast_tramp = (int (*)(volatile const void *, u_int))(
+ (uintptr_t)fuword16_fast + setidt_disp);
+ fueword_fast_tramp = (int (*)(volatile const void *, long *, u_int))(
+ (uintptr_t)fueword_fast + setidt_disp);
+ subyte_fast_tramp = (int (*)(volatile void *, int, u_int))(
+ (uintptr_t)subyte_fast + setidt_disp);
+ suword16_fast_tramp = (int (*)(volatile void *, int, u_int))(
+ (uintptr_t)suword16_fast + setidt_disp);
+ suword_fast_tramp = (int (*)(volatile void *, long, u_int))(
+ (uintptr_t)suword_fast + setidt_disp);
+}
+
+static int
+cp_slow0(vm_offset_t uva, size_t len, bool write,
+ void (*f)(vm_offset_t, void *), void *arg)
+{
+ struct pcpu *pc;
+ vm_page_t m[2];
+ pt_entry_t *pte;
+ vm_offset_t kaddr;
+ int error, i, plen;
+ bool sleepable;
+
+ plen = howmany(uva - trunc_page(uva) + len, PAGE_SIZE);
+ MPASS(plen <= nitems(m));
+ error = 0;
+ i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, uva, len,
+ (write ? VM_PROT_WRITE : VM_PROT_READ) | VM_PROT_QUICK_NOFAULT,
+ m, nitems(m));
+ if (i != plen)
+ return (EFAULT);
+ sched_pin();
+ pc = get_pcpu();
+ if (!THREAD_CAN_SLEEP() || curthread->td_vslock_sz > 0 ||
+ (curthread->td_pflags & TDP_NOFAULTING) != 0) {
+ sleepable = false;
+ mtx_lock(&pc->pc_copyout_mlock);
+ kaddr = pc->pc_copyout_maddr;
+ } else {
+ sleepable = true;
+ sx_xlock(&pc->pc_copyout_slock);
+ kaddr = pc->pc_copyout_saddr;
+ }
+ for (i = 0, pte = vtopte(kaddr); i < plen; i++, pte++) {
+ *pte = PG_V | PG_RW | PG_A | PG_M | VM_PAGE_TO_PHYS(m[i]) |
+ pmap_cache_bits(pmap_page_get_memattr(m[i]), FALSE);
+ invlpg(kaddr + ptoa(i));
+ }
+ kaddr += uva - trunc_page(uva);
+ f(kaddr, arg);
+ sched_unpin();
+ if (sleepable)
+ sx_xunlock(&pc->pc_copyout_slock);
+ else
+ mtx_unlock(&pc->pc_copyout_mlock);
+ for (i = 0; i < plen; i++) {
+ vm_page_lock(m[i]);
+ vm_page_unhold(m[i]);
+ vm_page_unlock(m[i]);
+ }
+ return (error);
+}
+
+struct copyinstr_arg0 {
+ vm_offset_t kc;
+ size_t len;
+ size_t alen;
+ bool end;
+};
+
+static void
+copyinstr_slow0(vm_offset_t kva, void *arg)
+{
+ struct copyinstr_arg0 *ca;
+ char c;
+
+ ca = arg;
+ MPASS(ca->alen == 0 && ca->len > 0 && !ca->end);
+ while (ca->alen < ca->len && !ca->end) {
+ c = *(char *)(kva + ca->alen);
+ *(char *)ca->kc = c;
+ ca->alen++;
+ ca->kc++;
+ if (c == '\0')
+ ca->end = true;
+ }
+}
+
+int
+copyinstr(const void *udaddr, void *kaddr, size_t maxlen, size_t *lencopied)
+{
+ struct copyinstr_arg0 ca;
+ vm_offset_t uc;
+ size_t plen;
+ int error;
+
+ error = 0;
+ ca.end = false;
+ for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr;
+ plen < maxlen && !ca.end; uc += ca.alen, plen += ca.alen) {
+ ca.len = round_page(uc) - uc;
+ if (ca.len == 0)
+ ca.len = PAGE_SIZE;
+ if (plen + ca.len > maxlen)
+ ca.len = maxlen - plen;
+ ca.alen = 0;
+ if (cp_slow0(uc, ca.len, false, copyinstr_slow0, &ca) != 0) {
+ error = EFAULT;
+ break;
+ }
+ }
+ if (!ca.end && plen == maxlen && error == 0)
+ error = ENAMETOOLONG;
+ if (lencopied != NULL)
+ *lencopied = plen;
+ return (error);
+}
+
+struct copyin_arg0 {
+ vm_offset_t kc;
+ size_t len;
+};
+
+static void
+copyin_slow0(vm_offset_t kva, void *arg)
+{
+ struct copyin_arg0 *ca;
+
+ ca = arg;
+ bcopy((void *)kva, (void *)ca->kc, ca->len);
+}
+
+int
+copyin(const void *udaddr, void *kaddr, size_t len)
+{
+ struct copyin_arg0 ca;
+ vm_offset_t uc;
+ size_t plen;
+
+ if ((uintptr_t)udaddr + len < (uintptr_t)udaddr ||
+ (uintptr_t)udaddr + len > VM_MAXUSER_ADDRESS)
+ return (-1);
+ if (len == 0 || (fast_copyout && len <= TRAMP_COPYOUT_SZ &&
+ copyin_fast_tramp(udaddr, kaddr, len, KCR3) == 0))
+ return (0);
+ for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr;
+ plen < len; uc += ca.len, ca.kc += ca.len, plen += ca.len) {
+ ca.len = round_page(uc) - uc;
+ if (ca.len == 0)
+ ca.len = PAGE_SIZE;
+ if (plen + ca.len > len)
+ ca.len = len - plen;
+ if (cp_slow0(uc, ca.len, false, copyin_slow0, &ca) != 0)
+ return (EFAULT);
+ }
+ return (0);
+}
+
+static void
+copyout_slow0(vm_offset_t kva, void *arg)
+{
+ struct copyin_arg0 *ca;
+
+ ca = arg;
+ bcopy((void *)ca->kc, (void *)kva, ca->len);
+}
+
+int
+copyout(const void *kaddr, void *udaddr, size_t len)
+{
+ struct copyin_arg0 ca;
+ vm_offset_t uc;
+ size_t plen;
+
+ if ((uintptr_t)udaddr + len < (uintptr_t)udaddr ||
+ (uintptr_t)udaddr + len > VM_MAXUSER_ADDRESS)
+ return (-1);
+ if (len == 0 || (fast_copyout && len <= TRAMP_COPYOUT_SZ &&
+ copyout_fast_tramp(kaddr, udaddr, len, KCR3) == 0))
+ return (0);
+ for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr;
+ plen < len; uc += ca.len, ca.kc += ca.len, plen += ca.len) {
+ ca.len = round_page(uc) - uc;
+ if (ca.len == 0)
+ ca.len = PAGE_SIZE;
+ if (plen + ca.len > len)
+ ca.len = len - plen;
+ if (cp_slow0(uc, ca.len, true, copyout_slow0, &ca) != 0)
+ return (EFAULT);
+ }
+ return (0);
+}
+
+/*
+ * Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user
+ * memory.
+ */
+
+static void
+fubyte_slow0(vm_offset_t kva, void *arg)
+{
+
+ *(int *)arg = *(u_char *)kva;
+}
+
+int
+fubyte(volatile const void *base)
+{
+ int res;
+
+ if ((uintptr_t)base + sizeof(uint8_t) < (uintptr_t)base ||
+ (uintptr_t)base + sizeof(uint8_t) > VM_MAXUSER_ADDRESS)
+ return (-1);
+ if (fast_copyout) {
+ res = fubyte_fast_tramp(base, KCR3);
+ if (res != -1)
+ return (res);
+ }
+ if (cp_slow0((vm_offset_t)base, sizeof(char), false, fubyte_slow0,
+ &res) != 0)
+ return (-1);
+ return (res);
+}
+
+static void
+fuword16_slow0(vm_offset_t kva, void *arg)
+{
+
+ *(int *)arg = *(uint16_t *)kva;
+}
+
+int
+fuword16(volatile const void *base)
+{
+ int res;
+
+ if ((uintptr_t)base + sizeof(uint16_t) < (uintptr_t)base ||
+ (uintptr_t)base + sizeof(uint16_t) > VM_MAXUSER_ADDRESS)
+ return (-1);
+ if (fast_copyout) {
+ res = fuword16_fast_tramp(base, KCR3);
+ if (res != -1)
+ return (res);
+ }
+ if (cp_slow0((vm_offset_t)base, sizeof(uint16_t), false,
+ fuword16_slow0, &res) != 0)
+ return (-1);
+ return (res);
+}
+
+static void
+fueword_slow0(vm_offset_t kva, void *arg)
+{
+
+ *(uint32_t *)arg = *(uint32_t *)kva;
+}
+
+int
+fueword(volatile const void *base, long *val)
+{
+ uint32_t res;
+
+ if ((uintptr_t)base + sizeof(*val) < (uintptr_t)base ||
+ (uintptr_t)base + sizeof(*val) > VM_MAXUSER_ADDRESS)
+ return (-1);
+ if (fast_copyout) {
+ if (fueword_fast_tramp(base, val, KCR3) == 0)
+ return (0);
+ }
+ if (cp_slow0((vm_offset_t)base, sizeof(long), false, fueword_slow0,
+ &res) != 0)
+ return (-1);
+ *val = res;
+ return (0);
+}
+
+int
+fueword32(volatile const void *base, int32_t *val)
+{
+
+ return (fueword(base, (long *)val));
+}
+
+/*
+ * Store a 32-bit word, a 16-bit word, or an 8-bit byte to user memory.
+ */
+
+static void
+subyte_slow0(vm_offset_t kva, void *arg)
+{
+
+ *(u_char *)kva = *(int *)arg;
+}
+
+int
+subyte(volatile void *base, int byte)
+{
+
+ if ((uintptr_t)base + sizeof(uint8_t) < (uintptr_t)base ||
+ (uintptr_t)base + sizeof(uint8_t) > VM_MAXUSER_ADDRESS)
+ return (-1);
+ if (fast_copyout && subyte_fast_tramp(base, byte, KCR3) == 0)
+ return (0);
+ return (cp_slow0((vm_offset_t)base, sizeof(u_char), true, subyte_slow0,
+ &byte) != 0 ? -1 : 0);
+}
+
+static void
+suword16_slow0(vm_offset_t kva, void *arg)
+{
+
+ *(int *)kva = *(uint16_t *)arg;
+}
+
+int
+suword16(volatile void *base, int word)
+{
+
+ if ((uintptr_t)base + sizeof(uint16_t) < (uintptr_t)base ||
+ (uintptr_t)base + sizeof(uint16_t) > VM_MAXUSER_ADDRESS)
+ return (-1);
+ if (fast_copyout && suword16_fast_tramp(base, word, KCR3) == 0)
+ return (0);
+ return (cp_slow0((vm_offset_t)base, sizeof(int16_t), true,
+ suword16_slow0, &word) != 0 ? -1 : 0);
+}
+
+static void
+suword_slow0(vm_offset_t kva, void *arg)
+{
+
+ *(int *)kva = *(uint32_t *)arg;
+}
+
+int
+suword(volatile void *base, long word)
+{
+
+ if ((uintptr_t)base + sizeof(word) < (uintptr_t)base ||
+ (uintptr_t)base + sizeof(word) > VM_MAXUSER_ADDRESS)
+ return (-1);
+ if (fast_copyout && suword_fast_tramp(base, word, KCR3) == 0)
+ return (0);
+ return (cp_slow0((vm_offset_t)base, sizeof(long), true,
+ suword_slow0, &word) != 0 ? -1 : 0);
+}
+
+int
+suword32(volatile void *base, int32_t word)
+{
+
+ return (suword(base, word));
+}
+
+struct casueword_arg0 {
+ uint32_t oldval;
+ uint32_t newval;
+};
+
+static void
+casueword_slow0(vm_offset_t kva, void *arg)
+{
+ struct casueword_arg0 *ca;
+
+ ca = arg;
+ atomic_fcmpset_int((u_int *)kva, &ca->oldval, ca->newval);
+}
+
+int
+casueword32(volatile uint32_t *base, uint32_t oldval, uint32_t *oldvalp,
+ uint32_t newval)
+{
+ struct casueword_arg0 ca;
+ int res;
+
+ ca.oldval = oldval;
+ ca.newval = newval;
+ res = cp_slow0((vm_offset_t)base, sizeof(int32_t), true,
+ casueword_slow0, &ca);
+ if (res == 0) {
+ *oldvalp = ca.oldval;
+ return (0);
+ }
+ return (-1);
+}
+
+int
+casueword(volatile u_long *base, u_long oldval, u_long *oldvalp, u_long newval)
+{
+ struct casueword_arg0 ca;
+ int res;
+
+ ca.oldval = oldval;
+ ca.newval = newval;
+ res = cp_slow0((vm_offset_t)base, sizeof(int32_t), true,
+ casueword_slow0, &ca);
+ if (res == 0) {
+ *oldvalp = ca.oldval;
+ return (0);
+ }
+ return (-1);
+}
Index: sys/i386/i386/copyout_fast.s
===================================================================
--- /dev/null
+++ sys/i386/i386/copyout_fast.s
@@ -0,0 +1,362 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2018 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <machine/asmacros.h>
+#include <machine/cputypes.h>
+#include <machine/pmap.h>
+#include <machine/specialreg.h>
+
+#include "assym.inc"
+
+ .text
+
+ENTRY(copyout_fast)
+ pushl %ebp
+ movl %esp, %ebp
+ pushl %esi
+ pushl %edi
+ pushl %ebx
+
+ movl $copyout_fault,%edx
+ movl 20(%ebp),%ebx /* KCR3 */
+
+ movl PCPU(CURPCB),%eax
+ movl PCB_CR3(%eax),%edi
+
+ cli
+ movl PCPU(TRAMPSTK),%esi
+ movl PCPU(COPYOUT_BUF),%eax
+ subl $4,%esi
+ movl %eax,(%esi)
+ movl 12(%ebp),%eax /* udaddr */
+ subl $4,%esi
+ movl %eax,(%esi)
+ movl 16(%ebp),%eax /* len */
+ subl $4,%esi
+ movl %eax,(%esi)
+
+ subl $4, %esi
+ movl %edi, (%esi)
+
+ movl 8(%ebp),%eax /* kaddr */
+ subl $4,%esi
+ movl %eax,(%esi)
+ movl PCPU(COPYOUT_BUF),%eax
+ subl $4,%esi
+ movl %eax,(%esi)
+ movl 16(%ebp),%eax /* len */
+ subl $4,%esi
+ movl %eax,(%esi)
+
+ movl %esp,%eax
+ movl %esi,%esp
+
+ /* bcopy(%esi = kaddr, %edi = PCPU(copyout_buf), %ecx = len) */
+ popl %ecx
+ popl %edi
+ popl %esi
+ rep; movsb
+
+ popl %edi
+ movl %edi,%cr3
+
+ /* bcopy(%esi = PCPU(copyout_buf), %edi = udaddr, %ecx = len) */
+ popl %ecx
+ popl %edi
+ popl %esi
+ rep; movsb
+
+ movl %ebx,%cr3
+ movl %eax,%esp
+ sti
+
+ xorl %eax,%eax
+ popl %ebx
+ popl %edi
+ popl %esi
+ leave
+ ret
+END(copyout_fast)
+
+ENTRY(copyin_fast)
+ pushl %ebp
+ movl %esp, %ebp
+ pushl %esi
+ pushl %edi
+ pushl %ebx
+
+ movl $copyout_fault,%edx
+ movl 20(%ebp),%ebx /* KCR3 */
+
+ movl PCPU(CURPCB),%eax
+ movl PCB_CR3(%eax),%edi
+
+ cli
+ movl PCPU(TRAMPSTK),%esi
+ movl PCPU(COPYOUT_BUF),%eax
+ subl $4,%esi
+ movl %eax,(%esi)
+ movl 12(%ebp),%eax /* kaddr */
+ subl $4,%esi
+ movl %eax,(%esi)
+ movl 16(%ebp),%eax /* len */
+ subl $4,%esi
+ movl %eax,(%esi)
+
+ movl 8(%ebp),%eax /* udaddr */
+ subl $4,%esi
+ movl %eax,(%esi)
+ movl PCPU(COPYOUT_BUF),%eax
+ subl $4,%esi
+ movl %eax,(%esi)
+ movl 16(%ebp),%eax /* len */
+ subl $4,%esi
+ movl %eax,(%esi)
+
+ movl %esp,%eax
+ movl %esi,%esp
+ movl %edi,%cr3
+
+ /* bcopy(%esi = udaddr, %edi = PCPU(copyout_buf), %ecx = len) */
+ popl %ecx
+ popl %edi
+ popl %esi
+ rep; movsb
+
+ movl %ebx,%cr3
+
+ /* bcopy(%esi = PCPU(copyout_buf), %edi = kaddr, %ecx = len) */
+ popl %ecx
+ popl %edi
+ popl %esi
+ rep; movsb
+
+ movl %eax,%esp
+ sti
+
+ xorl %eax,%eax
+ popl %ebx
+ popl %edi
+ popl %esi
+ leave
+ ret
+END(copyin_fast)
+
+ ALIGN_TEXT
+copyout_fault:
+ movl %eax,%esp
+ sti
+ movl $EFAULT,%eax
+ popl %ebx
+ popl %edi
+ popl %esi
+ leave
+ ret
+
+ENTRY(fueword_fast)
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 8(%ebp),%ecx /* from */
+ movl PCPU(CURPCB),%eax
+ movl PCB_CR3(%eax),%eax
+ movl $fusufault,%edx
+ movl 16(%ebp),%ebx
+ movl %esp,%esi
+ cli
+ movl PCPU(TRAMPSTK),%esp
+ movl %eax,%cr3
+ movl (%ecx),%eax
+ movl %ebx,%cr3
+ movl %esi,%esp
+ sti
+ movl 12(%ebp),%edx
+ movl %eax,(%edx)
+ xorl %eax,%eax
+ popl %edi
+ popl %esi
+ popl %ebx
+ leave
+ ret
+END(fueword_fast)
+
+ENTRY(fuword16_fast)
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 8(%ebp),%ecx /* from */
+ movl PCPU(CURPCB),%eax
+ movl PCB_CR3(%eax),%eax
+ movl $fusufault,%edx
+ movl 12(%ebp),%ebx
+ movl %esp,%esi
+ cli
+ movl PCPU(TRAMPSTK),%esp
+ movl %eax,%cr3
+ movzwl (%ecx),%eax
+ movl %ebx,%cr3
+ movl %esi,%esp
+ sti
+ popl %edi
+ popl %esi
+ popl %ebx
+ leave
+ ret
+END(fuword16_fast)
+
+ENTRY(fubyte_fast)
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 8(%ebp),%ecx /* from */
+ movl PCPU(CURPCB),%eax
+ movl PCB_CR3(%eax),%eax
+ movl $fusufault,%edx
+ movl 12(%ebp),%ebx
+ movl %esp,%esi
+ cli
+ movl PCPU(TRAMPSTK),%esp
+ movl %eax,%cr3
+ movzbl (%ecx),%eax
+ movl %ebx,%cr3
+ movl %esi,%esp
+ sti
+ popl %edi
+ popl %esi
+ popl %ebx
+ leave
+ ret
+END(fubyte_fast)
+
+ ALIGN_TEXT
+fusufault:
+ movl %esi,%esp
+ sti
+ xorl %eax,%eax
+ decl %eax
+ popl %edi
+ popl %esi
+ popl %ebx
+ leave
+ ret
+
+ENTRY(suword_fast)
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl PCPU(CURPCB),%eax
+ movl PCB_CR3(%eax),%eax
+ movl $fusufault,%edx
+ movl 8(%ebp),%ecx /* to */
+ movl 12(%ebp),%edi /* val */
+ movl 16(%ebp),%ebx
+ movl %esp,%esi
+ cli
+ movl PCPU(TRAMPSTK),%esp
+ movl %eax,%cr3
+ movl %edi,(%ecx)
+ movl %ebx,%cr3
+ movl %esi,%esp
+ sti
+ xorl %eax,%eax
+ popl %edi
+ popl %esi
+ popl %ebx
+ leave
+ ret
+END(suword_fast)
+
+ENTRY(suword16_fast)
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl PCPU(CURPCB),%eax
+ movl PCB_CR3(%eax),%eax
+ movl $fusufault,%edx
+ movl 8(%ebp),%ecx /* to */
+ movl 12(%ebp),%edi /* val */
+ movl 16(%ebp),%ebx
+ movl %esp,%esi
+ cli
+ movl PCPU(TRAMPSTK),%esp
+ movl %eax,%cr3
+ movw %di,(%ecx)
+ movl %ebx,%cr3
+ movl %esi,%esp
+ sti
+ xorl %eax,%eax
+ popl %edi
+ popl %esi
+ popl %ebx
+ leave
+ ret
+END(suword16_fast)
+
+ENTRY(subyte_fast)
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl PCPU(CURPCB),%eax
+ movl PCB_CR3(%eax),%eax
+ movl $fusufault,%edx
+ movl 8(%ebp),%ecx /* to */
+ movl 12(%ebp),%edi /* val */
+ movl 16(%ebp),%ebx
+ movl %esp,%esi
+ cli
+ movl PCPU(TRAMPSTK),%esp
+ movl %eax,%cr3
+ movl %edi,%eax
+ movb %al,(%ecx)
+ movl %ebx,%cr3
+ movl %esi,%esp
+ sti
+ xorl %eax,%eax
+ popl %edi
+ popl %esi
+ popl %ebx
+ leave
+ ret
+END(subyte_fast)
Index: sys/i386/i386/db_interface.c
===================================================================
--- sys/i386/i386/db_interface.c
+++ sys/i386/i386/db_interface.c
@@ -115,4 +115,7 @@
db_printf("APIC ID = %d\n", pc->pc_apic_id);
db_printf("currentldt = 0x%x\n", pc->pc_currentldt);
+ db_printf("trampstk = 0x%x\n", pc->pc_trampstk);
+ db_printf("kesp0 = 0x%x\n", pc->pc_kesp0);
+ db_printf("common_tssp = 0x%x\n", (u_int)pc->pc_common_tssp);
}
Index: sys/i386/i386/db_trace.c
===================================================================
--- sys/i386/i386/db_trace.c
+++ sys/i386/i386/db_trace.c
@@ -317,7 +317,12 @@
* actually made the call.
*/
frame_type = NORMAL;
- sym = db_search_symbol(eip - 1, DB_STGY_ANY, &offset);
+ if (eip >= PMAP_TRM_MIN_ADDRESS) {
+ sym = db_search_symbol(eip - 1 - setidt_disp, DB_STGY_ANY,
+ &offset);
+ } else {
+ sym = db_search_symbol(eip - 1, DB_STGY_ANY, &offset);
+ }
db_symbol_values(sym, &name, NULL);
if (name != NULL) {
if (strcmp(name, "calltrap") == 0 ||
@@ -357,9 +362,9 @@
* switch to a known good state.
*/
if (frame_type == DOUBLE_FAULT) {
- esp = PCPU_GET(common_tss.tss_esp);
- eip = PCPU_GET(common_tss.tss_eip);
- ebp = PCPU_GET(common_tss.tss_ebp);
+ esp = PCPU_GET(common_tssp)->tss_esp;
+ eip = PCPU_GET(common_tssp)->tss_eip;
+ ebp = PCPU_GET(common_tssp)->tss_ebp;
db_printf(
"--- trap 0x17, eip = %#r, esp = %#r, ebp = %#r ---\n",
eip, esp, ebp);
@@ -379,30 +384,41 @@
else
tf = (struct trapframe *)((int)*fp + 12);
- if (INKERNEL((int) tf)) {
- esp = get_esp(tf);
- eip = tf->tf_eip;
- ebp = tf->tf_ebp;
- switch (frame_type) {
- case TRAP:
- db_printf("--- trap %#r", tf->tf_trapno);
- break;
- case SYSCALL:
- db_printf("--- syscall");
- decode_syscall(tf->tf_eax, td);
- break;
- case TRAP_TIMERINT:
- case TRAP_INTERRUPT:
- case INTERRUPT:
- db_printf("--- interrupt");
- break;
- default:
- panic("The moon has moved again.");
- }
- db_printf(", eip = %#r, esp = %#r, ebp = %#r ---\n", eip,
- esp, ebp);
+ esp = get_esp(tf);
+ eip = tf->tf_eip;
+ ebp = tf->tf_ebp;
+ switch (frame_type) {
+ case TRAP:
+ db_printf("--- trap %#r", tf->tf_trapno);
+ break;
+ case SYSCALL:
+ db_printf("--- syscall");
+ decode_syscall(tf->tf_eax, td);
+ break;
+ case TRAP_TIMERINT:
+ case TRAP_INTERRUPT:
+ case INTERRUPT:
+ db_printf("--- interrupt");
+ break;
+ default:
+ panic("The moon has moved again.");
}
-
+ db_printf(", eip = %#r, esp = %#r, ebp = %#r ---\n", eip, esp, ebp);
+
+ switch (frame_type) {
+ case TRAP:
+ case TRAP_TIMERINT:
+ case TRAP_INTERRUPT:
+ case INTERRUPT:
+ if ((tf->tf_eflags & PSL_VM) != 0 ||
+ (tf->tf_cs & SEL_RPL_MASK) != 0)
+ ebp = 0;
+ break;
+ case SYSCALL:
+ ebp = 0;
+ break;
+ }
+
*ip = (db_addr_t) eip;
*fp = (struct i386_frame *) ebp;
}
@@ -432,6 +448,10 @@
return (0);
}
+ /* 'frame' can be null initially. Just print the pc then. */
+ if (frame == NULL)
+ goto out;
+
/*
* If an indirect call via an invalid pointer caused a trap,
* %pc contains the invalid address while the return address
@@ -540,15 +560,20 @@
db_nextframe(&frame, &pc, td);
- if (INKERNEL((int)pc) && !INKERNEL((int) frame)) {
+out:
+ /*
+ * 'frame' can be null here, either because it was initially
+ * null or because db_nextframe() found no frame.
+ * db_nextframe() may also have found a non-kernel frame.
+ * !INKERNEL() classifies both. Stop tracing if either,
+ * after printing the pc if it is the kernel.
+ */
+ if (frame == NULL || frame <= actframe) {
sym = db_search_symbol(pc, DB_STGY_ANY, &offset);
db_symbol_values(sym, &name, NULL);
db_print_stack_entry(name, 0, 0, 0, pc, frame);
break;
}
- if (!INKERNEL((int) frame)) {
- break;
- }
}
return (0);
Index: sys/i386/i386/elf_machdep.c
===================================================================
--- sys/i386/i386/elf_machdep.c
+++ sys/i386/i386/elf_machdep.c
@@ -137,7 +137,6 @@
(sysinit_cfunc_t) elf32_insert_brand_entry,
&kfreebsd_brand_info);
-
void
elf32_dump_thread(struct thread *td, void *dst, size_t *off)
{
Index: sys/i386/i386/exception.s
===================================================================
--- sys/i386/i386/exception.s
+++ sys/i386/i386/exception.s
@@ -1,11 +1,13 @@
/*-
* Copyright (c) 1989, 1990 William F. Jolitz.
* Copyright (c) 1990 The Regents of the University of California.
- * Copyright (c) 2007 The FreeBSD Foundation
+ * Copyright (c) 2007, 2018 The FreeBSD Foundation
* All rights reserved.
*
* Portions of this software were developed by A. Joseph Koshy under
* sponsorship from the FreeBSD Foundation and Google, Inc.
+ * Portions of this software were developed by Konstantin Belousov
+ * <kib@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -38,14 +40,11 @@
#include "opt_atpic.h"
#include "opt_hwpmc_hooks.h"
-#include <machine/asmacros.h>
-#include <machine/psl.h>
-#include <machine/trap.h>
-
#include "assym.inc"
-#define SEL_RPL_MASK 0x0003
-#define GSEL_KPL 0x0020 /* GSEL(GCODE_SEL, SEL_KPL) */
+#include <machine/psl.h>
+#include <machine/asmacros.h>
+#include <machine/trap.h>
#ifdef KDTRACE_HOOKS
.bss
@@ -63,20 +62,19 @@
.zero 8
#endif
.text
-#ifdef HWPMC_HOOKS
- ENTRY(start_exceptions)
-#endif
+ENTRY(start_exceptions)
+ .globl tramp_idleptd
+tramp_idleptd: .long 0
+
/*****************************************************************************/
/* Trap handling */
/*****************************************************************************/
/*
* Trap and fault vector routines.
*
- * Most traps are 'trap gates', SDT_SYS386TGT. A trap gate pushes state on
- * the stack that mostly looks like an interrupt, but does not disable
- * interrupts. A few of the traps we are use are interrupt gates,
- * SDT_SYS386IGT, which are nearly the same thing except interrupts are
- * disabled on entry.
+ * All traps are 'interrupt gates', SDT_SYS386IGT. Interrupts are disabled
+ * by hardware to not allow interrupts until code switched to the kernel
+ * address space and the kernel thread stack.
*
* The cpu will push a certain amount of state onto the kernel stack for
* the current process. The amount of state depends on the type of trap
@@ -92,6 +90,10 @@
* must restore them prior to calling 'iret'. The cpu adjusts the %cs and
* %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we
* must load them with appropriate values for supervisor mode operation.
+ *
+ * This code is not executed at the linked address, it is copied to the
+ * trampoline area. As the consequence, all code there and in included files
+ * must be PIC.
*/
MCOUNT_LABEL(user)
@@ -103,8 +105,6 @@
pushl $0; TRAP(T_DIVIDE)
IDTVEC(dbg)
pushl $0; TRAP(T_TRCTRAP)
-IDTVEC(nmi)
- pushl $0; TRAP(T_NMI)
IDTVEC(bpt)
pushl $0; TRAP(T_BPTFLT)
IDTVEC(dtrace_ret)
@@ -124,15 +124,23 @@
IDTVEC(tss)
TRAP(T_TSSFLT)
IDTVEC(missing)
- TRAP(T_SEGNPFLT)
+ pushl $T_SEGNPFLT
+ jmp irettraps
IDTVEC(stk)
- TRAP(T_STKFLT)
+ pushl $T_STKFLT
+ jmp irettraps
IDTVEC(prot)
- TRAP(T_PROTFLT)
+ pushl $T_PROTFLT
+ jmp irettraps
IDTVEC(page)
- TRAP(T_PAGEFLT)
-IDTVEC(mchk)
- pushl $0; TRAP(T_MCHK)
+ cmpl $PMAP_TRM_MIN_ADDRESS, TF_EIP-TF_ERR(%esp)
+ jb 1f
+ movl %ebx, %cr3
+ movl %edx, TF_EIP-TF_ERR(%esp)
+ addl $4, %esp
+ iret
+1: pushl $T_PAGEFLT
+ jmp alltraps
IDTVEC(rsvd_pti)
IDTVEC(rsvd)
pushl $0; TRAP(T_RESERVED)
@@ -144,7 +152,8 @@
pushl $0; TRAP(T_XMMFLT)
/*
- * All traps except ones for syscalls jump to alltraps. If
+ * All traps except ones for syscalls or invalid segment,
+ * jump to alltraps. If
* interrupts were enabled when the trap occurred, then interrupts
* are enabled now if the trap was through a trap gate, else
* disabled if the trap was through an interrupt gate. Note that
@@ -156,20 +165,16 @@
.globl alltraps
.type alltraps,@function
alltraps:
- pushal
- pushl $0
- movw %ds,(%esp)
- pushl $0
- movw %es,(%esp)
- pushl $0
- movw %fs,(%esp)
+ PUSH_FRAME2
alltraps_with_regs_pushed:
SET_KERNEL_SREGS
cld
+ KENTER
FAKE_MCOUNT(TF_EIP(%esp))
calltrap:
pushl %esp
- call trap
+ movl $trap,%eax
+ call *%eax
add $4, %esp
/*
@@ -178,28 +183,84 @@
MEXITCOUNT
jmp doreti
+ .globl irettraps
+ .type irettraps,@function
+irettraps:
+ testl $PSL_VM, TF_EFLAGS-TF_TRAPNO(%esp)
+ jnz alltraps
+ testb $SEL_RPL_MASK, TF_CS-TF_TRAPNO(%esp)
+ jnz alltraps
+
+ /*
+ * Kernel mode.
+ * The special case there is the kernel mode with user %cr3 and
+ * trampoline stack. We need to copy both current frame and the
+ * hardware portion of the frame we tried to return to, to the
+ * normal stack. This logic must follow the stack unwind order
+ * in doreti.
+ */
+ PUSH_FRAME2
+ SET_KERNEL_SREGS
+ cld
+ call 1f
+1: popl %ebx
+ leal (doreti_iret - 1b)(%ebx), %edx
+ cmpl %edx, TF_EIP(%esp)
+ jne 2f
+ movl $(2 * TF_SZ - TF_EIP), %ecx
+ jmp 6f
+2: leal (doreti_popl_ds - 1b)(%ebx), %edx
+ cmpl %edx, TF_EIP(%esp)
+ jne 3f
+ movl $(2 * TF_SZ - TF_DS), %ecx
+ jmp 6f
+3: leal (doreti_popl_es - 1b)(%ebx), %edx
+ cmpl %edx, TF_EIP(%esp)
+ jne 4f
+ movl $(2 * TF_SZ - TF_ES), %ecx
+ jmp 6f
+4: leal (doreti_popl_fs - 1b)(%ebx), %edx
+ cmpl %edx, TF_EIP(%esp)
+ jne 5f
+ movl $(2 * TF_SZ - TF_FS), %ecx
+ jmp 6f
+ /* kernel mode, normal */
+5: FAKE_MCOUNT(TF_EIP(%esp))
+ jmp calltrap
+6: cmpl $PMAP_TRM_MIN_ADDRESS, %esp /* trampoline stack ? */
+ jb 5b /* if not, no need to change stacks */
+ movl (tramp_idleptd - 1b)(%ebx), %eax
+ movl %eax, %cr3
+ movl PCPU(KESP0), %edx
+ subl %ecx, %edx
+ movl %edx, %edi
+ movl %esp, %esi
+ rep; movsb
+ movl %edx, %esp
+ FAKE_MCOUNT(TF_EIP(%esp))
+ jmp calltrap
+
/*
* Privileged instruction fault.
*/
#ifdef KDTRACE_HOOKS
SUPERALIGN_TEXT
IDTVEC(ill)
- /*
- * Check if a DTrace hook is registered. The default (data) segment
- * cannot be used for this since %ds is not known good until we
- * verify that the entry was from kernel mode.
- */
- cmpl $0,%ss:dtrace_invop_jump_addr
- je norm_ill
-
/*
* Check if this is a user fault. If so, just handle it as a normal
* trap.
*/
- cmpl $GSEL_KPL, 4(%esp) /* Check the code segment */
- jne norm_ill
testl $PSL_VM, 8(%esp) /* and vm86 mode. */
jnz norm_ill
+ cmpl $GSEL_KPL, 4(%esp) /* Check the code segment */
+ jne norm_ill
+
+ /*
+ * Check if a DTrace hook is registered. The trampoline cannot
+ * be instrumented.
+ */
+ cmpl $0, dtrace_invop_jump_addr
+ je norm_ill
/*
* This is a kernel instruction fault that might have been caused
@@ -221,47 +282,43 @@
* Process the instruction fault in the normal way.
*/
norm_ill:
- pushl $0
- TRAP(T_PRIVINFLT)
+ pushl $0
+ pushl $T_PRIVINFLT
+ jmp alltraps
#endif
-/*
- * Call gate entry for syscalls (lcall 7,0).
- * This is used by FreeBSD 1.x a.out executables and "old" NetBSD executables.
- *
- * The intersegment call has been set up to specify one dummy parameter.
- * This leaves a place to put eflags so that the call frame can be
- * converted to a trap frame. Note that the eflags is (semi-)bogusly
- * pushed into (what will be) tf_err and then copied later into the
- * final spot. It has to be done this way because esp can't be just
- * temporarily altered for the pushfl - an interrupt might come in
- * and clobber the saved cs/eip.
- */
- SUPERALIGN_TEXT
-IDTVEC(lcall_syscall)
- pushfl /* save eflags */
- popl 8(%esp) /* shuffle into tf_eflags */
- pushl $7 /* sizeof "lcall 7,0" */
- pushl $0 /* tf_trapno */
- pushal
- pushl $0
- movw %ds,(%esp)
+IDTVEC(mchk)
pushl $0
- movw %es,(%esp)
+ pushl $T_MCHK
+ jmp nmi_mchk_common
+
+IDTVEC(nmi)
pushl $0
- movw %fs,(%esp)
+ pushl $T_NMI
+nmi_mchk_common:
+ PUSH_FRAME2
SET_KERNEL_SREGS
cld
+ /*
+ * Save %cr3 into tf_err. There is no good place to put it.
+ * Always reload %cr3, since we might have interrupted the
+ * kernel entry or exit.
+ * Do not switch to the thread kernel stack, otherwise we might
+ * obliterate the previous context partially copied from the
+ * trampoline stack.
+ */
+ movl %cr3, %eax
+ movl %eax, TF_ERR(%esp)
+ call 1f
+1: popl %eax
+ movl (tramp_idleptd - 1b)(%eax), %eax
+ movl %eax, %cr3
FAKE_MCOUNT(TF_EIP(%esp))
- pushl %esp
- call syscall
- add $4, %esp
- MEXITCOUNT
- jmp doreti
+ jmp calltrap
/*
* Trap gate entry for syscalls (int 0x80).
- * This is used by FreeBSD ELF executables, "new" NetBSD executables, and all
+ * This is used by FreeBSD ELF executables, "new" a.out executables, and all
* Linux executables.
*
* Even though the name says 'int0x80', this is actually a trap gate, not an
@@ -272,18 +329,15 @@
IDTVEC(int0x80_syscall)
pushl $2 /* sizeof "int 0x80" */
pushl $0 /* tf_trapno */
- pushal
- pushl $0
- movw %ds,(%esp)
- pushl $0
- movw %es,(%esp)
- pushl $0
- movw %fs,(%esp)
+ PUSH_FRAME2
SET_KERNEL_SREGS
cld
+ MOVE_STACKS
+ sti
FAKE_MCOUNT(TF_EIP(%esp))
pushl %esp
- call syscall
+ movl $syscall, %eax
+ call *%eax
add $4, %esp
MEXITCOUNT
jmp doreti
@@ -292,7 +346,8 @@
pushl %esp /* trapframe pointer */
pushl %ebx /* arg1 */
pushl %esi /* function */
- call fork_exit
+ movl $fork_exit, %eax
+ call *%eax
addl $12,%esp
/* cut from syscall */
@@ -343,6 +398,8 @@
.text
MCOUNT_LABEL(eintr)
+#include <i386/i386/copyout_fast.s>
+
/*
* void doreti(struct trapframe)
*
@@ -375,7 +432,7 @@
movl PCPU(CURPCB),%ecx
testl $PCB_VM86CALL,PCB_FLAGS(%ecx)
jz doreti_ast
- jmp doreti_exit
+ jmp doreti_popl_fs
doreti_notvm86:
testb $SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */
@@ -393,7 +450,8 @@
je doreti_exit
sti
pushl %esp /* pass a pointer to the trapframe */
- call ast
+ movl $ast, %eax
+ call *%eax
add $4,%esp
jmp doreti_ast
@@ -407,6 +465,23 @@
doreti_exit:
MEXITCOUNT
+ cmpl $T_NMI, TF_TRAPNO(%esp)
+ je doreti_iret_nmi
+ cmpl $T_MCHK, TF_TRAPNO(%esp)
+ je doreti_iret_nmi
+ testl $SEL_RPL_MASK, TF_CS(%esp)
+ jz doreti_popl_fs
+ movl %esp, %esi
+ movl PCPU(TRAMPSTK), %edx
+ movl $TF_SZ, %ecx
+ subl %ecx, %edx
+ movl %edx, %edi
+ rep; movsb
+ movl %edx, %esp
+ movl PCPU(CURPCB),%eax
+ movl PCB_CR3(%eax), %eax
+ movl %eax, %cr3
+
.globl doreti_popl_fs
doreti_popl_fs:
popl %fs
@@ -422,6 +497,11 @@
doreti_iret:
iret
+doreti_iret_nmi:
+ movl TF_ERR(%esp), %eax
+ movl %eax, %cr3
+ jmp doreti_popl_fs
+
/*
* doreti_iret_fault and friends. Alternative return code for
* the case where we get a fault in the doreti_exit code
@@ -440,7 +520,8 @@
ALIGN_TEXT
.globl doreti_iret_fault
doreti_iret_fault:
- subl $8,%esp
+ pushl $0 /* tf_err */
+ pushl $0 /* tf_trapno XXXKIB: provide more useful value ? */
pushal
pushl $0
movw %ds,(%esp)
@@ -460,10 +541,10 @@
doreti_popl_fs_fault:
testb $SEL_RPL_MASK,TF_CS-TF_FS(%esp)
jz doreti_popl_fs_kfault
- sti
movl $0,TF_ERR(%esp) /* XXX should be the error code */
movl $T_PROTFLT,TF_TRAPNO(%esp)
- jmp alltraps_with_regs_pushed
+ SET_KERNEL_SREGS
+ jmp calltrap
doreti_popl_ds_kfault:
movl $0,(%esp)
@@ -474,7 +555,7 @@
doreti_popl_fs_kfault:
movl $0,(%esp)
jmp doreti_popl_fs
-
+
#ifdef HWPMC_HOOKS
doreti_nmi:
/*
@@ -482,6 +563,8 @@
* was from user mode and if so whether the current thread
* needs a user call chain capture.
*/
+ testl $PSL_VM, TF_EFLAGS(%esp)
+ jnz doreti_exit
testb $SEL_RPL_MASK,TF_CS(%esp)
jz doreti_exit
movl PCPU(CURTHREAD),%eax /* curthread present? */
@@ -489,12 +572,21 @@
jz doreti_exit
testl $TDP_CALLCHAIN,TD_PFLAGS(%eax) /* flagged for capture? */
jz doreti_exit
+ /*
+ * Switch to thread stack. Reset tf_trapno to not indicate NMI,
+ * to cause normal userspace exit.
+ */
+ movl $T_RESERVED, TF_TRAPNO(%esp)
+ NMOVE_STACKS
/*
* Take the processor out of NMI mode by executing a fake "iret".
*/
pushfl
pushl %cs
- pushl $outofnmi
+ call 1f
+1: popl %eax
+ leal (outofnmi-1b)(%eax),%eax
+ pushl %eax
iret
outofnmi:
/*
@@ -511,5 +603,6 @@
call *%ecx
addl $12,%esp
jmp doreti_ast
- ENTRY(end_exceptions)
#endif
+
+ENTRY(end_exceptions)
Index: sys/i386/i386/genassym.c
===================================================================
--- sys/i386/i386/genassym.c
+++ sys/i386/i386/genassym.c
@@ -74,6 +74,7 @@
#include <x86/apicreg.h>
#endif
#include <machine/cpu.h>
+#include <machine/pcb_ext.h>
#include <machine/pcb.h>
#include <machine/sigframe.h>
#include <machine/vm86.h>
@@ -141,6 +142,8 @@
ASSYM(PCB_DBREGS, PCB_DBREGS);
ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
+ASSYM(PCB_EXT_TSS, offsetof(struct pcb_ext, ext_tss));
+
ASSYM(PCB_FSD, offsetof(struct pcb, pcb_fsd));
ASSYM(PCB_GSD, offsetof(struct pcb, pcb_gsd));
ASSYM(PCB_VM86, offsetof(struct pcb, pcb_vm86));
@@ -164,6 +167,7 @@
ASSYM(TF_EIP, offsetof(struct trapframe, tf_eip));
ASSYM(TF_CS, offsetof(struct trapframe, tf_cs));
ASSYM(TF_EFLAGS, offsetof(struct trapframe, tf_eflags));
+ASSYM(TF_SZ, sizeof(struct trapframe));
ASSYM(SIGF_HANDLER, offsetof(struct sigframe, sf_ahu.sf_handler));
#ifdef COMPAT_43
@@ -206,7 +210,7 @@
ASSYM(PC_FPCURTHREAD, offsetof(struct pcpu, pc_fpcurthread));
ASSYM(PC_IDLETHREAD, offsetof(struct pcpu, pc_idlethread));
ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb));
-ASSYM(PC_COMMON_TSS, offsetof(struct pcpu, pc_common_tss));
+ASSYM(PC_COMMON_TSSP, offsetof(struct pcpu, pc_common_tssp));
ASSYM(PC_COMMON_TSSD, offsetof(struct pcpu, pc_common_tssd));
ASSYM(PC_TSS_GDT, offsetof(struct pcpu, pc_tss_gdt));
ASSYM(PC_FSGS_GDT, offsetof(struct pcpu, pc_fsgs_gdt));
@@ -214,6 +218,9 @@
ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid));
ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap));
ASSYM(PC_PRIVATE_TSS, offsetof(struct pcpu, pc_private_tss));
+ASSYM(PC_KESP0, offsetof(struct pcpu, pc_kesp0));
+ASSYM(PC_TRAMPSTK, offsetof(struct pcpu, pc_trampstk));
+ASSYM(PC_COPYOUT_BUF, offsetof(struct pcpu, pc_copyout_buf));
#ifdef DEV_APIC
ASSYM(LA_EOI, LAPIC_EOI * LAPIC_MEM_MUL);
@@ -227,6 +234,10 @@
ASSYM(BC32SEL, GSEL(GBIOSCODE32_SEL, SEL_KPL));
ASSYM(GPROC0_SEL, GPROC0_SEL);
ASSYM(VM86_FRAMESIZE, sizeof(struct vm86frame));
+ASSYM(VM86_STACK_SPACE, VM86_STACK_SPACE);
+
+ASSYM(PMAP_TRM_MIN_ADDRESS, PMAP_TRM_MIN_ADDRESS);
+ASSYM(TRAMP_COPYOUT_SZ, TRAMP_COPYOUT_SZ);
#ifdef HWPMC_HOOKS
ASSYM(PMC_FN_USER_CALLCHAIN, PMC_FN_USER_CALLCHAIN);
Index: sys/i386/i386/locore.s
===================================================================
--- sys/i386/i386/locore.s
+++ sys/i386/i386/locore.s
@@ -53,14 +53,6 @@
#include "assym.inc"
-/*
- * XXX
- *
- * Note: This version greatly munged to avoid various assembler errors
- * that may be fixed in newer versions of gas. Perhaps newer versions
- * will have more pleasant appearance.
- */
-
/*
* PTmap is recursive pagemap at top of virtual address space.
* Within PTmap, the page directory can be found (third indirection).
@@ -71,7 +63,7 @@
.set PTDpde,PTD + (PTDPTDI * PDESIZE)
/*
- * Compiled KERNBASE location and the kernel load address
+ * Compiled KERNBASE location and the kernel load address, now identical.
*/
.globl kernbase
.set kernbase,KERNBASE
@@ -90,83 +82,6 @@
.globl bootinfo
bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */
- .globl KERNend
-KERNend: .long 0 /* phys addr end of kernel (just after bss) */
-physfree: .long 0 /* phys addr of next free page */
-
- .globl IdlePTD
-IdlePTD: .long 0 /* phys addr of kernel PTD */
-
-#if defined(PAE) || defined(PAE_TABLES)
- .globl IdlePDPT
-IdlePDPT: .long 0 /* phys addr of kernel PDPT */
-#endif
-
- .globl KPTmap
-KPTmap: .long 0 /* address of kernel page tables */
-
- .globl KPTphys
-KPTphys: .long 0 /* phys addr of kernel page tables */
-
- .globl proc0kstack
-proc0kstack: .long 0 /* address of proc 0 kstack space */
-p0kpa: .long 0 /* phys addr of proc0's STACK */
-
-vm86phystk: .long 0 /* PA of vm86/bios stack */
-
- .globl vm86paddr, vm86pa
-vm86paddr: .long 0 /* address of vm86 region */
-vm86pa: .long 0 /* phys addr of vm86 region */
-
-/**********************************************************************
- *
- * Some handy macros
- *
- */
-
-#define R(foo) ((foo)-KERNBASE)
-
-#define ALLOCPAGES(foo) \
- movl R(physfree), %esi ; \
- movl $((foo)*PAGE_SIZE), %eax ; \
- addl %esi, %eax ; \
- movl %eax, R(physfree) ; \
- movl %esi, %edi ; \
- movl $((foo)*PAGE_SIZE),%ecx ; \
- xorl %eax,%eax ; \
- cld ; \
- rep ; \
- stosb
-
-/*
- * fillkpt
- * eax = page frame address
- * ebx = index into page table
- * ecx = how many pages to map
- * base = base address of page dir/table
- * prot = protection bits
- */
-#define fillkpt(base, prot) \
- shll $PTESHIFT,%ebx ; \
- addl base,%ebx ; \
- orl $PG_V,%eax ; \
- orl prot,%eax ; \
-1: movl %eax,(%ebx) ; \
- addl $PAGE_SIZE,%eax ; /* increment physical address */ \
- addl $PTESIZE,%ebx ; /* next pte */ \
- loop 1b
-
-/*
- * fillkptphys(prot)
- * eax = physical address
- * ecx = how many pages to map
- * prot = protection bits
- */
-#define fillkptphys(prot) \
- movl %eax, %ebx ; \
- shrl $PAGE_SHIFT, %ebx ; \
- fillkpt(R(KPTphys), prot)
-
.text
/**********************************************************************
*
@@ -179,6 +94,7 @@
movw $0x1234,0x472
/* Set up a real frame in case the double return in newboot is executed. */
+ xorl %ebp,%ebp
pushl %ebp
movl %esp, %ebp
@@ -204,8 +120,8 @@
* inactive from now until we switch to new ones, since we don't load any
* more segment registers or permit interrupts until after the switch.
*/
- movl $R(end),%ecx
- movl $R(edata),%edi
+ movl $end,%ecx
+ movl $edata,%edi
subl %edi,%ecx
xorl %eax,%eax
cld
@@ -220,48 +136,10 @@
* the old stack, but it need not be, since recover_bootinfo actually
* returns via the old frame.
*/
- movl $R(tmpstk),%esp
+ movl $tmpstk,%esp
call identify_cpu
- call create_pagetables
-
-/*
- * If the CPU has support for VME, turn it on.
- */
- testl $CPUID_VME, R(cpu_feature)
- jz 1f
- movl %cr4, %eax
- orl $CR4_VME, %eax
- movl %eax, %cr4
-1:
-
-/* Now enable paging */
-#if defined(PAE) || defined(PAE_TABLES)
- movl R(IdlePDPT), %eax
- movl %eax, %cr3
- movl %cr4, %edx
- orl $CR4_PAE, %edx
- movl %edx, %cr4
-#else
- movl R(IdlePTD), %eax
- movl %eax,%cr3 /* load ptd addr into mmu */
-#endif
- movl %cr0,%edx /* get control word */
- orl $CR0_PE|CR0_PG,%edx /* enable paging */
- movl %edx,%cr0 /* and let's page NOW! */
-
- pushl $begin /* jump to high virtualized address */
- ret
-
-begin:
- /*
- * Now running relocated at KERNBASE where the system is linked to run.
- *
- * Remove the lowest part of the double mapping of low memory to get
- * some null pointer checks.
- */
- movl $0,PTD
- movl %eax,%cr3 /* invalidate TLB */
+ call pmap_cold
/* set up bootstrap stack */
movl proc0kstack,%eax /* location of in-kernel stack */
@@ -375,7 +253,7 @@
cmpl $0,%esi
je 2f /* No kernelname */
movl $MAXPATHLEN,%ecx /* Brute force!!! */
- movl $R(kernelname),%edi
+ movl $kernelname,%edi
cmpb $'/',(%esi) /* Make sure it starts with a slash */
je 1f
movb $'/',(%edi)
@@ -403,7 +281,7 @@
* Copy the common part of the bootinfo struct
*/
movl %ebx,%esi
- movl $R(bootinfo),%edi
+ movl $bootinfo,%edi
cmpl $BOOTINFO_SIZE,%ecx
jbe got_common_bi_size
movl $BOOTINFO_SIZE,%ecx
@@ -420,12 +298,12 @@
movl BI_NFS_DISKLESS(%ebx),%esi
cmpl $0,%esi
je olddiskboot
- movl $R(nfs_diskless),%edi
+ movl $nfs_diskless,%edi
movl $NFSDISKLESS_SIZE,%ecx
cld
rep
movsb
- movl $R(nfs_diskless_valid),%edi
+ movl $nfs_diskless_valid,%edi
movl $1,(%edi)
#endif
#endif
@@ -438,9 +316,9 @@
*/
olddiskboot:
movl 8(%ebp),%eax
- movl %eax,R(boothowto)
+ movl %eax,boothowto
movl 12(%ebp),%eax
- movl %eax,R(bootdev)
+ movl %eax,bootdev
ret
@@ -478,16 +356,16 @@
divl %ecx
jz trynexgen
popfl
- movl $CPU_386,R(cpu)
+ movl $CPU_386,cpu
jmp 3f
trynexgen:
popfl
- movl $CPU_NX586,R(cpu)
- movl $0x4778654e,R(cpu_vendor) # store vendor string
- movl $0x72446e65,R(cpu_vendor+4)
- movl $0x6e657669,R(cpu_vendor+8)
- movl $0,R(cpu_vendor+12)
+ movl $CPU_NX586,cpu
+ movl $0x4778654e,cpu_vendor # store vendor string
+ movl $0x72446e65,cpu_vendor+4
+ movl $0x6e657669,cpu_vendor+8
+ movl $0,cpu_vendor+12
jmp 3f
try486: /* Try to toggle identification flag; does not exist on early 486s. */
@@ -506,7 +384,7 @@
testl %eax,%eax
jnz trycpuid
- movl $CPU_486,R(cpu)
+ movl $CPU_486,cpu
/*
* Check Cyrix CPU
@@ -533,250 +411,46 @@
* CPU, we couldn't distinguish it from Cyrix's (including IBM
* brand of Cyrix CPUs).
*/
- movl $0x69727943,R(cpu_vendor) # store vendor string
- movl $0x736e4978,R(cpu_vendor+4)
- movl $0x64616574,R(cpu_vendor+8)
+ movl $0x69727943,cpu_vendor # store vendor string
+ movl $0x736e4978,cpu_vendor+4
+ movl $0x64616574,cpu_vendor+8
jmp 3f
trycpuid: /* Use the `cpuid' instruction. */
xorl %eax,%eax
cpuid # cpuid 0
- movl %eax,R(cpu_high) # highest capability
- movl %ebx,R(cpu_vendor) # store vendor string
- movl %edx,R(cpu_vendor+4)
- movl %ecx,R(cpu_vendor+8)
- movb $0,R(cpu_vendor+12)
+ movl %eax,cpu_high # highest capability
+ movl %ebx,cpu_vendor # store vendor string
+ movl %edx,cpu_vendor+4
+ movl %ecx,cpu_vendor+8
+ movb $0,cpu_vendor+12
movl $1,%eax
cpuid # cpuid 1
- movl %eax,R(cpu_id) # store cpu_id
- movl %ebx,R(cpu_procinfo) # store cpu_procinfo
- movl %edx,R(cpu_feature) # store cpu_feature
- movl %ecx,R(cpu_feature2) # store cpu_feature2
+ movl %eax,cpu_id # store cpu_id
+ movl %ebx,cpu_procinfo # store cpu_procinfo
+ movl %edx,cpu_feature # store cpu_feature
+ movl %ecx,cpu_feature2 # store cpu_feature2
rorl $8,%eax # extract family type
andl $15,%eax
cmpl $5,%eax
jae 1f
/* less than Pentium; must be 486 */
- movl $CPU_486,R(cpu)
+ movl $CPU_486,cpu
jmp 3f
1:
/* a Pentium? */
cmpl $5,%eax
jne 2f
- movl $CPU_586,R(cpu)
+ movl $CPU_586,cpu
jmp 3f
2:
/* Greater than Pentium...call it a Pentium Pro */
- movl $CPU_686,R(cpu)
+ movl $CPU_686,cpu
3:
ret
-
-/**********************************************************************
- *
- * Create the first page directory and its page tables.
- *
- */
-
-create_pagetables:
-
-/* Find end of kernel image (rounded up to a page boundary). */
- movl $R(_end),%esi
-
-/* Include symbols, if any. */
- movl R(bootinfo+BI_ESYMTAB),%edi
- testl %edi,%edi
- je over_symalloc
- movl %edi,%esi
- movl $KERNBASE,%edi
- addl %edi,R(bootinfo+BI_SYMTAB)
- addl %edi,R(bootinfo+BI_ESYMTAB)
-over_symalloc:
-
-/* If we are told where the end of the kernel space is, believe it. */
- movl R(bootinfo+BI_KERNEND),%edi
- testl %edi,%edi
- je no_kernend
- movl %edi,%esi
-no_kernend:
-
- addl $PDRMASK,%esi /* Play conservative for now, and */
- andl $~PDRMASK,%esi /* ... round up to PDR boundary */
- movl %esi,R(KERNend) /* save end of kernel */
- movl %esi,R(physfree) /* next free page is at end of kernel */
-
-/* Allocate Kernel Page Tables */
- ALLOCPAGES(NKPT)
- movl %esi,R(KPTphys)
- addl $(KERNBASE-(KPTDI<<(PDRSHIFT-PAGE_SHIFT+PTESHIFT))),%esi
- movl %esi,R(KPTmap)
-
-/* Allocate Page Table Directory */
-#if defined(PAE) || defined(PAE_TABLES)
- /* XXX only need 32 bytes (easier for now) */
- ALLOCPAGES(1)
- movl %esi,R(IdlePDPT)
-#endif
- ALLOCPAGES(NPGPTD)
- movl %esi,R(IdlePTD)
-
-/* Allocate KSTACK */
- ALLOCPAGES(TD0_KSTACK_PAGES)
- movl %esi,R(p0kpa)
- addl $KERNBASE, %esi
- movl %esi, R(proc0kstack)
-
- ALLOCPAGES(1) /* vm86/bios stack */
- movl %esi,R(vm86phystk)
-
- ALLOCPAGES(3) /* pgtable + ext + IOPAGES */
- movl %esi,R(vm86pa)
- addl $KERNBASE, %esi
- movl %esi, R(vm86paddr)
-
-/*
- * Enable PSE and PGE.
- */
-#ifndef DISABLE_PSE
- testl $CPUID_PSE, R(cpu_feature)
- jz 1f
- movl $PG_PS, R(pseflag)
- movl %cr4, %eax
- orl $CR4_PSE, %eax
- movl %eax, %cr4
-1:
-#endif
-#ifndef DISABLE_PG_G
- testl $CPUID_PGE, R(cpu_feature)
- jz 2f
- movl $PG_G, R(pgeflag)
- movl %cr4, %eax
- orl $CR4_PGE, %eax
- movl %eax, %cr4
-2:
-#endif
-
-/*
- * Initialize page table pages mapping physical address zero through the
- * (physical) end of the kernel. Many of these pages must be reserved,
- * and we reserve them all and map them linearly for convenience. We do
- * this even if we've enabled PSE above; we'll just switch the corresponding
- * kernel PDEs before we turn on paging.
- *
- * XXX: We waste some pages here in the PSE case!
- *
- * This and all other page table entries allow read and write access for
- * various reasons. Kernel mappings never have any access restrictions.
- */
- xorl %eax, %eax
- movl R(KERNend),%ecx
- shrl $PAGE_SHIFT,%ecx
- fillkptphys($PG_RW)
-
-/* Map page table pages. */
- movl R(KPTphys),%eax
- movl $NKPT,%ecx
- fillkptphys($PG_RW)
-
-/* Map page directory. */
-#if defined(PAE) || defined(PAE_TABLES)
- movl R(IdlePDPT), %eax
- movl $1, %ecx
- fillkptphys($PG_RW)
-#endif
-
- movl R(IdlePTD), %eax
- movl $NPGPTD, %ecx
- fillkptphys($PG_RW)
-
-/* Map proc0's KSTACK in the physical way ... */
- movl R(p0kpa), %eax
- movl $(TD0_KSTACK_PAGES), %ecx
- fillkptphys($PG_RW)
-
-/* Map ISA hole */
- movl $ISA_HOLE_START, %eax
- movl $ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
- fillkptphys($PG_RW)
-
-/* Map space for the vm86 region */
- movl R(vm86phystk), %eax
- movl $4, %ecx
- fillkptphys($PG_RW)
-
-/* Map page 0 into the vm86 page table */
- movl $0, %eax
- movl $0, %ebx
- movl $1, %ecx
- fillkpt(R(vm86pa), $PG_RW|PG_U)
-
-/* ...likewise for the ISA hole */
- movl $ISA_HOLE_START, %eax
- movl $ISA_HOLE_START>>PAGE_SHIFT, %ebx
- movl $ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
- fillkpt(R(vm86pa), $PG_RW|PG_U)
-
-/*
- * Create an identity mapping for low physical memory, including the kernel.
- * This is only used to map the 2 instructions for jumping to 'begin' in
- * locore (we map everything to avoid having to determine where these
- * instructions are). ACPI resume will transiently restore the first PDE in
- * this mapping (and depend on this PDE's page table created here not being
- * destroyed). See pmap_bootstrap() for more details.
- *
- * Note: There are errata concerning large pages and physical address zero,
- * so a PG_PS mapping should not be used for PDE 0. Our double mapping
- * avoids this automatically by not using PG_PS for PDE #KPDI so that PAT
- * bits can be set at the page level for i/o pages below 1 MB.
- */
- movl R(KPTphys), %eax
- xorl %ebx, %ebx
- movl $NKPT, %ecx
- fillkpt(R(IdlePTD), $PG_RW)
-
-/*
- * Install PDEs for PTs covering enough kva to bootstrap. Then for the PSE
- * case, replace the PDEs whose coverage is strictly within the kernel
- * (between KERNLOAD (rounded up) and KERNend) by large-page PDEs.
- */
- movl R(KPTphys), %eax
- movl $KPTDI, %ebx
- movl $NKPT, %ecx
- fillkpt(R(IdlePTD), $PG_RW)
- cmpl $0,R(pseflag)
- je done_pde
-
- movl R(KERNend), %ecx
- movl $(KERNLOAD + PDRMASK) & ~PDRMASK, %eax
- subl %eax, %ecx
- shrl $PDRSHIFT, %ecx
- movl $KPTDI + ((KERNLOAD + PDRMASK) >> PDRSHIFT), %ebx
- shll $PDESHIFT, %ebx
- addl R(IdlePTD), %ebx
- orl $(PG_V|PG_RW|PG_PS), %eax
-1: movl %eax, (%ebx)
- addl $(1 << PDRSHIFT), %eax
- addl $PDESIZE, %ebx
- loop 1b
-
-done_pde:
-/* install a pde recursively mapping page directory as a page table */
- movl R(IdlePTD), %eax
- movl $PTDPTDI, %ebx
- movl $NPGPTD,%ecx
- fillkpt(R(IdlePTD), $PG_RW)
-
-#if defined(PAE) || defined(PAE_TABLES)
- movl R(IdlePTD), %eax
- xorl %ebx, %ebx
- movl $NPGPTD, %ecx
- fillkpt(R(IdlePDPT), $0x0)
-#endif
-
- ret
-
#ifdef XENHVM
/* Xen Hypercall page */
.text
Index: sys/i386/i386/machdep.c
===================================================================
--- sys/i386/i386/machdep.c
+++ sys/i386/i386/machdep.c
@@ -1,6 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-4-Clause
*
+ * Copyright (c) 2018 The FreeBSD Foundation
* Copyright (c) 1992 Terrence R. Lambert.
* Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
* All rights reserved.
@@ -8,6 +9,9 @@
* This code is derived from software contributed to Berkeley by
* William Jolitz.
*
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -81,9 +85,7 @@
#include <sys/rwlock.h>
#include <sys/sched.h>
#include <sys/signalvar.h>
-#ifdef SMP
#include <sys/smp.h>
-#endif
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
#include <sys/sysent.h>
@@ -128,6 +130,7 @@
#include <machine/reg.h>
#include <machine/sigframe.h>
#include <machine/specialreg.h>
+#include <machine/sysarch.h>
#include <machine/trap.h>
#include <machine/vm86.h>
#include <x86/init.h>
@@ -152,8 +155,8 @@
/* Sanity check for __curthread() */
CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
-extern register_t init386(int first);
-extern void dblfault_handler(void);
+register_t init386(int first);
+void dblfault_handler(void);
static void cpu_startup(void *);
static void fpstate_drop(struct thread *td);
@@ -210,14 +213,18 @@
struct mem_range_softc mem_range_softc;
- /* Default init_ops implementation. */
- struct init_ops init_ops = {
+extern char start_exceptions[], end_exceptions[];
+
+extern struct sysentvec elf32_freebsd_sysvec;
+
+/* Default init_ops implementation. */
+struct init_ops init_ops = {
.early_clock_source_init = i8254_init,
.early_delay = i8254_delay,
#ifdef DEV_APIC
.msi_init = msi_init,
#endif
- };
+};
static void
cpu_startup(dummy)
@@ -1098,24 +1105,59 @@
return (EJUSTRETURN);
}
+#ifdef COMPAT_43
+static void
+setup_priv_lcall_gate(struct proc *p)
+{
+ struct i386_ldt_args uap;
+ union descriptor desc;
+ u_int lcall_addr;
+
+ bzero(&uap, sizeof(uap));
+ uap.start = 0;
+ uap.num = 1;
+ lcall_addr = p->p_sysent->sv_psstrings - sz_lcall_tramp;
+ bzero(&desc, sizeof(desc));
+ desc.sd.sd_type = SDT_MEMERA;
+ desc.sd.sd_dpl = SEL_UPL;
+ desc.sd.sd_p = 1;
+ desc.sd.sd_def32 = 1;
+ desc.sd.sd_gran = 1;
+ desc.sd.sd_lolimit = 0xffff;
+ desc.sd.sd_hilimit = 0xf;
+ desc.sd.sd_lobase = lcall_addr;
+ desc.sd.sd_hibase = lcall_addr >> 24;
+ i386_set_ldt(curthread, &uap, &desc);
+}
+#endif
+
/*
* Reset registers to default values on exec.
*/
void
exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
{
- struct trapframe *regs = td->td_frame;
- struct pcb *pcb = td->td_pcb;
+ struct trapframe *regs;
+ struct pcb *pcb;
+
+ regs = td->td_frame;
+ pcb = td->td_pcb;
/* Reset pc->pcb_gs and %gs before possibly invalidating it. */
pcb->pcb_gs = _udatasel;
load_gs(_udatasel);
mtx_lock_spin(&dt_lock);
- if (td->td_proc->p_md.md_ldt)
+ if (td->td_proc->p_md.md_ldt != NULL)
user_ldt_free(td);
else
mtx_unlock_spin(&dt_lock);
+
+#ifdef COMPAT_43
+ if (td->td_proc->p_sysent->sv_psstrings !=
+ elf32_freebsd_sysvec.sv_psstrings)
+ setup_priv_lcall_gate(td->td_proc);
+#endif
/*
* Reset the fs and gs bases. The values from the old address
@@ -1217,18 +1259,22 @@
int _default_ldt;
-union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */
-union descriptor ldt[NLDT]; /* local descriptor table */
+struct mtx dt_lock; /* lock for GDT and LDT */
+
+union descriptor gdt0[NGDT]; /* initial global descriptor table */
+union descriptor *gdt = gdt0; /* global descriptor table */
+
+union descriptor *ldt; /* local descriptor table */
+
static struct gate_descriptor idt0[NIDT];
struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
-struct region_descriptor r_gdt, r_idt; /* table descriptors */
-struct mtx dt_lock; /* lock for GDT and LDT */
-static struct i386tss dblfault_tss;
-static char dblfault_stack[PAGE_SIZE];
+static struct i386tss *dblfault_tss;
+static char *dblfault_stack;
-extern vm_offset_t proc0kstack;
+static struct i386tss common_tss0;
+vm_offset_t proc0kstack;
/*
* software prototypes -- in more palatable form.
@@ -1329,8 +1375,8 @@
.ssd_def32 = 0,
.ssd_gran = 0 },
/* GLDT_SEL 10 LDT Descriptor */
-{ .ssd_base = (int) ldt,
- .ssd_limit = sizeof(ldt)-1,
+{ .ssd_base = 0,
+ .ssd_limit = sizeof(union descriptor) * NLDT - 1,
.ssd_type = SDT_SYSLDT,
.ssd_dpl = SEL_UPL,
.ssd_p = 1,
@@ -1338,7 +1384,7 @@
.ssd_def32 = 0,
.ssd_gran = 0 },
/* GUSERLDT_SEL 11 User LDT Descriptor per process */
-{ .ssd_base = (int) ldt,
+{ .ssd_base = 0,
.ssd_limit = (512 * sizeof(union descriptor)-1),
.ssd_type = SDT_SYSLDT,
.ssd_dpl = 0,
@@ -1347,7 +1393,7 @@
.ssd_def32 = 0,
.ssd_gran = 0 },
/* GPANIC_SEL 12 Panic Tss Descriptor */
-{ .ssd_base = (int) &dblfault_tss,
+{ .ssd_base = 0,
.ssd_limit = sizeof(struct i386tss)-1,
.ssd_type = SDT_SYS386TSS,
.ssd_dpl = 0,
@@ -1468,25 +1514,31 @@
.ssd_gran = 1 },
};
+uintptr_t setidt_disp;
+
void
-setidt(idx, func, typ, dpl, selec)
- int idx;
- inthand_t *func;
- int typ;
- int dpl;
- int selec;
+setidt(int idx, inthand_t *func, int typ, int dpl, int selec)
+{
+ uintptr_t off;
+
+ off = func != NULL ? (uintptr_t)func + setidt_disp : 0;
+ setidt_nodisp(idx, off, typ, dpl, selec);
+}
+
+void
+setidt_nodisp(int idx, uintptr_t off, int typ, int dpl, int selec)
{
struct gate_descriptor *ip;
ip = idt + idx;
- ip->gd_looffset = (int)func;
+ ip->gd_looffset = off;
ip->gd_selector = selec;
ip->gd_stkcpy = 0;
ip->gd_xx = 0;
ip->gd_type = typ;
ip->gd_dpl = dpl;
ip->gd_p = 1;
- ip->gd_hioffset = ((int)func)>>16 ;
+ ip->gd_hioffset = ((u_int)off) >> 16 ;
}
extern inthand_t
@@ -1501,7 +1553,7 @@
#ifdef XENHVM
IDTVEC(xen_intr_upcall),
#endif
- IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
+ IDTVEC(int0x80_syscall);
#ifdef DDB
/*
@@ -1512,15 +1564,29 @@
{
struct gate_descriptor *ip;
int idx;
- uintptr_t func;
+ uintptr_t func, func_trm;
+ bool trm;
ip = idt;
for (idx = 0; idx < NIDT && !db_pager_quit; idx++) {
- func = (ip->gd_hioffset << 16 | ip->gd_looffset);
- if (func != (uintptr_t)&IDTVEC(rsvd)) {
- db_printf("%3d\t", idx);
- db_printsym(func, DB_STGY_PROC);
- db_printf("\n");
+ if (ip->gd_type == SDT_SYSTASKGT) {
+ db_printf("%3d\t<TASK>\n", idx);
+ } else {
+ func = (ip->gd_hioffset << 16 | ip->gd_looffset);
+ if (func >= PMAP_TRM_MIN_ADDRESS) {
+ func_trm = func;
+ func -= setidt_disp;
+ trm = true;
+ } else
+ trm = false;
+ if (func != (uintptr_t)&IDTVEC(rsvd)) {
+ db_printf("%3d\t", idx);
+ db_printsym(func, DB_STGY_PROC);
+ if (trm)
+ db_printf(" (trampoline %#x)",
+ func_trm);
+ db_printf("\n");
+ }
}
ip++;
}
@@ -1567,6 +1633,24 @@
db_printf("dr6\t0x%08x\n", rdr6());
db_printf("dr7\t0x%08x\n", rdr7());
}
+
+DB_SHOW_COMMAND(frame, db_show_frame)
+{
+ struct trapframe *frame;
+
+ frame = have_addr ? (struct trapframe *)addr : curthread->td_frame;
+ printf("ss %#x esp %#x efl %#x cs %#x eip %#x\n",
+ frame->tf_ss, frame->tf_esp, frame->tf_eflags, frame->tf_cs,
+ frame->tf_eip);
+ printf("err %#x trapno %d\n", frame->tf_err, frame->tf_trapno);
+ printf("ds %#x es %#x fs %#x\n",
+ frame->tf_ds, frame->tf_es, frame->tf_fs);
+ printf("eax %#x ecx %#x edx %#x ebx %#x\n",
+ frame->tf_eax, frame->tf_ecx, frame->tf_edx, frame->tf_ebx);
+ printf("ebp %#x esi %#x edi %#x\n",
+ frame->tf_ebp, frame->tf_esi, frame->tf_edi);
+
+}
#endif
void
@@ -1693,7 +1777,6 @@
static void
basemem_setup(void)
{
- vm_paddr_t pa;
pt_entry_t *pte;
int i;
@@ -1703,30 +1786,6 @@
basemem = 640;
}
- /*
- * XXX if biosbasemem is now < 640, there is a `hole'
- * between the end of base memory and the start of
- * ISA memory. The hole may be empty or it may
- * contain BIOS code or data. Map it read/write so
- * that the BIOS can write to it. (Memory from 0 to
- * the physical end of the kernel is mapped read-only
- * to begin with and then parts of it are remapped.
- * The parts that aren't remapped form holes that
- * remain read-only and are unused by the kernel.
- * The base memory area is below the physical end of
- * the kernel and right now forms a read-only hole.
- * The part of it from PAGE_SIZE to
- * (trunc_page(biosbasemem * 1024) - 1) will be
- * remapped and used by the kernel later.)
- *
- * This code is similar to the code used in
- * pmap_mapdev, but since no memory needs to be
- * allocated we simply change the mapping.
- */
- for (pa = trunc_page(basemem * 1024);
- pa < ISA_HOLE_START; pa += PAGE_SIZE)
- pmap_kenter(KERNBASE + pa, pa);
-
/*
* Map pages between basemem and ISA_HOLE_START, if any, r/w into
* the vm86 page table so that vm86 can scribble on them using
@@ -1807,9 +1866,8 @@
* the kernel page table so we can use it as a buffer. The
* kernel will unmap this page later.
*/
- pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT);
vmc.npages = 0;
- smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT));
+ smap = (void *)vm86_addpage(&vmc, 1, PMAP_MAP_LOW + ptoa(1));
res = vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
KASSERT(res != 0, ("vm86_getptr() failed: address not found"));
@@ -2130,13 +2188,119 @@
#endif
}
+static void
+fixup_idt(void)
+{
+ struct gate_descriptor *ip;
+ uintptr_t off;
+ int x;
+
+ for (x = 0; x < NIDT; x++) {
+ ip = &idt[x];
+ if (ip->gd_type != SDT_SYS386IGT &&
+ ip->gd_type != SDT_SYS386TGT)
+ continue;
+ off = ip->gd_looffset + (((u_int)ip->gd_hioffset) << 16);
+ KASSERT(off >= (uintptr_t)start_exceptions &&
+ off < (uintptr_t)end_exceptions,
+ ("IDT[%d] type %d off %#x", x, ip->gd_type, off));
+ off += setidt_disp;
+ MPASS(off >= PMAP_TRM_MIN_ADDRESS &&
+ off < PMAP_TRM_MAX_ADDRESS);
+ ip->gd_looffset = off;
+ ip->gd_hioffset = off >> 16;
+ }
+}
+
+static void
+i386_setidt1(void)
+{
+ int x;
+
+ /* exceptions */
+ for (x = 0; x < NIDT; x++)
+ setidt(x, &IDTVEC(rsvd), SDT_SYS386IGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_DE, &IDTVEC(div), SDT_SYS386IGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_DB, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYS386IGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_BP, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_OF, &IDTVEC(ofl), SDT_SYS386IGT, SEL_UPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_BR, &IDTVEC(bnd), SDT_SYS386IGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386IGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_NM, &IDTVEC(dna), SDT_SYS386IGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_DF, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL,
+ SEL_KPL));
+ setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYS386IGT,
+ SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_TS, &IDTVEC(tss), SDT_SYS386IGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_NP, &IDTVEC(missing), SDT_SYS386IGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_SS, &IDTVEC(stk), SDT_SYS386IGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386IGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_PF, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_MF, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_AC, &IDTVEC(align), SDT_SYS386IGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_MC, &IDTVEC(mchk), SDT_SYS386IGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386IGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall),
+ SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
+#ifdef KDTRACE_HOOKS
+ setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret),
+ SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
+#endif
+#ifdef XENHVM
+ setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall),
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+#endif
+}
+
+static void
+i386_setidt2(void)
+{
+
+ setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386IGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386IGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
+}
+
+#if defined(DEV_ISA) && !defined(DEV_ATPIC)
+static void
+i386_setidt3(void)
+{
+
+ setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint),
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint),
+ SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+}
+#endif
+
register_t
init386(int first)
{
- struct gate_descriptor *gdp;
+ struct region_descriptor r_gdt, r_idt; /* table descriptors */
int gsel_tss, metadata_missing, x, pa;
struct pcpu *pc;
struct xstate_hdr *xhdr;
+ vm_offset_t addend;
int late_console;
thread0.td_kstack = proc0kstack;
@@ -2148,18 +2312,23 @@
*/
proc_linkup0(&proc0, &thread0);
- metadata_missing = 0;
if (bootinfo.bi_modulep) {
- preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
- preload_bootstrap_relocate(KERNBASE);
+ metadata_missing = 0;
+ addend = (vm_paddr_t)bootinfo.bi_modulep < KERNBASE ?
+ PMAP_MAP_LOW : 0;
+ preload_metadata = (caddr_t)bootinfo.bi_modulep + addend;
+ preload_bootstrap_relocate(addend);
} else {
metadata_missing = 1;
}
- if (bootinfo.bi_envp != 0)
- init_static_kenv((char *)bootinfo.bi_envp + KERNBASE, 0);
- else
+ if (bootinfo.bi_envp != 0) {
+ addend = (vm_paddr_t)bootinfo.bi_envp < KERNBASE ?
+ PMAP_MAP_LOW : 0;
+ init_static_kenv((char *)bootinfo.bi_envp + addend, 0);
+ } else {
init_static_kenv(NULL, 0);
+ }
identify_hypervisor();
@@ -2179,21 +2348,21 @@
pc = &__pcpu[0];
gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1);
- gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
- gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
+ gdt_segs[GPRIV_SEL].ssd_base = (int)pc;
+ gdt_segs[GPROC0_SEL].ssd_base = (int)&common_tss0;
for (x = 0; x < NGDT; x++)
- ssdtosd(&gdt_segs[x], &gdt[x].sd);
+ ssdtosd(&gdt_segs[x], &gdt0[x].sd);
- r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
- r_gdt.rd_base = (int) gdt;
+ r_gdt.rd_limit = NGDT * sizeof(gdt0[0]) - 1;
+ r_gdt.rd_base = (int)gdt0;
mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN);
lgdt(&r_gdt);
pcpu_init(pc, 0, sizeof(struct pcpu));
for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE)
- pmap_kenter(pa + KERNBASE, pa);
- dpcpu_init((void *)(first + KERNBASE), 0);
+ pmap_kenter(pa, pa);
+ dpcpu_init((void *)first, 0);
first += DPCPU_SIZE;
PCPU_SET(prvspace, pc);
PCPU_SET(curthread, &thread0);
@@ -2210,67 +2379,7 @@
mutex_init();
mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE);
- /* make ldt memory segments */
- ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
- ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1);
- for (x = 0; x < nitems(ldt_segs); x++)
- ssdtosd(&ldt_segs[x], &ldt[x].sd);
-
- _default_ldt = GSEL(GLDT_SEL, SEL_KPL);
- lldt(_default_ldt);
- PCPU_SET(currentldt, _default_ldt);
-
- /* exceptions */
- for (x = 0; x < NIDT; x++)
- setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_DE, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_DB, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYS386IGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_BP, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_OF, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_BR, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_NM, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL
- , GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_DF, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
- setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_TS, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_NP, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_SS, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_PF, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_MF, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_AC, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_MC, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL,
- GSEL(GCODE_SEL, SEL_KPL));
-#ifdef KDTRACE_HOOKS
- setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYS386TGT, SEL_UPL,
- GSEL(GCODE_SEL, SEL_KPL));
-#endif
-#ifdef XENHVM
- setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYS386IGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
-#endif
+ i386_setidt1();
r_idt.rd_limit = sizeof(idt0) - 1;
r_idt.rd_base = (int) idt;
@@ -2283,41 +2392,21 @@
clock_init();
finishidentcpu(); /* Final stage of CPU initialization */
- setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
+ i386_setidt2();
initializecpu(); /* Initialize CPU registers */
initializecpucache();
/* pointer to selector slot for %fs/%gs */
PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
- dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
- dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
- dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
- dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
-#if defined(PAE) || defined(PAE_TABLES)
- dblfault_tss.tss_cr3 = (int)IdlePDPT;
-#else
- dblfault_tss.tss_cr3 = (int)IdlePTD;
-#endif
- dblfault_tss.tss_eip = (int)dblfault_handler;
- dblfault_tss.tss_eflags = PSL_KERNEL;
- dblfault_tss.tss_ds = dblfault_tss.tss_es =
- dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
- dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
- dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
- dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
-
/* Initialize the tss (except for the final esp0) early for vm86. */
- PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
- thread0.td_kstack_pages * PAGE_SIZE - 16);
- PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
+ common_tss0.tss_esp0 = thread0.td_kstack + thread0.td_kstack_pages *
+ PAGE_SIZE - VM86_STACK_SPACE;
+ common_tss0.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
+ common_tss0.tss_ioopt = sizeof(struct i386tss) << 16;
gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
- PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
ltr(gsel_tss);
/* Initialize the PIC early for vm86 calls. */
@@ -2333,10 +2422,7 @@
* Point the ICU spurious interrupt vectors at the APIC spurious
* interrupt handler.
*/
- setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
- setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
- GSEL(GCODE_SEL, SEL_KPL));
+ i386_setidt3();
#endif
#endif
@@ -2386,22 +2472,11 @@
PCPU_SET(curpcb, thread0.td_pcb);
/* Move esp0 in the tss to its final place. */
/* Note: -16 is so we can grow the trapframe if we came from vm86 */
- PCPU_SET(common_tss.tss_esp0, (vm_offset_t)thread0.td_pcb - 16);
+ common_tss0.tss_esp0 = (vm_offset_t)thread0.td_pcb - VM86_STACK_SPACE;
+ PCPU_SET(kesp0, common_tss0.tss_esp0);
gdt[GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; /* clear busy bit */
ltr(gsel_tss);
- /* make a call gate to reenter kernel with */
- gdp = &ldt[LSYS5CALLS_SEL].gd;
-
- x = (int) &IDTVEC(lcall_syscall);
- gdp->gd_looffset = x;
- gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
- gdp->gd_stkcpy = 1;
- gdp->gd_type = SDT_SYS386CGT;
- gdp->gd_dpl = SEL_UPL;
- gdp->gd_p = 1;
- gdp->gd_hioffset = x >> 16;
-
/* transfer to user mode */
_ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
@@ -2427,6 +2502,133 @@
return ((register_t)thread0.td_pcb);
}
+extern u_int tramp_idleptd;
+
+static void
+machdep_init_trampoline(void)
+{
+ struct region_descriptor r_gdt, r_idt;
+ struct i386tss *tss;
+ char *copyout_buf, *trampoline, *tramp_stack_base;
+ u_int *tramp_idleptd_reloced;
+ int x;
+
+ gdt = pmap_trm_alloc(sizeof(union descriptor) * NGDT * mp_ncpus,
+ M_NOWAIT | M_ZERO);
+ bcopy(gdt0, gdt, sizeof(union descriptor) * NGDT);
+ r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
+ r_gdt.rd_base = (int)gdt;
+ lgdt(&r_gdt);
+
+ tss = pmap_trm_alloc(sizeof(struct i386tss) * mp_ncpus,
+ M_NOWAIT | M_ZERO);
+ bcopy(&common_tss0, tss, sizeof(struct i386tss));
+ gdt[GPROC0_SEL].sd.sd_lobase = (int)tss;
+ gdt[GPROC0_SEL].sd.sd_hibase = (u_int)tss >> 24;
+ gdt[GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
+ ltr(GSEL(GPROC0_SEL, SEL_KPL));
+
+ PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
+ PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
+ PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
+ PCPU_SET(common_tssp, tss);
+
+ trampoline = pmap_trm_alloc(end_exceptions - start_exceptions,
+ M_NOWAIT);
+ bcopy(start_exceptions, trampoline, end_exceptions - start_exceptions);
+ tramp_stack_base = pmap_trm_alloc(TRAMP_STACK_SZ, M_NOWAIT);
+ PCPU_SET(trampstk, (uintptr_t)tramp_stack_base + TRAMP_STACK_SZ -
+ VM86_STACK_SPACE);
+ tss[0].tss_esp0 = PCPU_GET(trampstk);
+
+ idt = pmap_trm_alloc(sizeof(idt0), M_NOWAIT | M_ZERO);
+ bcopy(idt0, idt, sizeof(idt0));
+
+ /* Re-initialize new IDT since the handlers were relocated */
+ setidt_disp = trampoline - start_exceptions;
+ fixup_idt();
+
+ tramp_idleptd_reloced = (u_int *)((uintptr_t)&tramp_idleptd +
+ setidt_disp);
+#if defined(PAE) || defined(PAE_TABLES)
+ *tramp_idleptd_reloced = (u_int)IdlePDPT;
+#else
+ *tramp_idleptd_reloced = (u_int)IdlePTD;
+#endif
+
+ r_idt.rd_limit = sizeof(struct gate_descriptor) * NIDT - 1;
+ r_idt.rd_base = (int)idt;
+ lidt(&r_idt);
+
+ /* dblfault TSS */
+ dblfault_tss = pmap_trm_alloc(sizeof(struct i386tss), M_NOWAIT | M_ZERO);
+ dblfault_stack = pmap_trm_alloc(PAGE_SIZE, M_NOWAIT);
+ dblfault_tss->tss_esp = dblfault_tss->tss_esp0 =
+ dblfault_tss->tss_esp1 = dblfault_tss->tss_esp2 =
+ (int)dblfault_stack + PAGE_SIZE;
+ dblfault_tss->tss_ss = dblfault_tss->tss_ss0 = dblfault_tss->tss_ss1 =
+ dblfault_tss->tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
+#if defined(PAE) || defined(PAE_TABLES)
+ dblfault_tss->tss_cr3 = (int)IdlePDPT;
+#else
+ dblfault_tss->tss_cr3 = (int)IdlePTD;
+#endif
+ dblfault_tss->tss_eip = (int)dblfault_handler;
+ dblfault_tss->tss_eflags = PSL_KERNEL;
+ dblfault_tss->tss_ds = dblfault_tss->tss_es =
+ dblfault_tss->tss_gs = GSEL(GDATA_SEL, SEL_KPL);
+ dblfault_tss->tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
+ dblfault_tss->tss_cs = GSEL(GCODE_SEL, SEL_KPL);
+ dblfault_tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
+ gdt[GPANIC_SEL].sd.sd_lobase = (int)dblfault_tss;
+ gdt[GPANIC_SEL].sd.sd_hibase = (u_int)dblfault_tss >> 24;
+
+ /* make ldt memory segments */
+ ldt = pmap_trm_alloc(sizeof(union descriptor) * NLDT,
+ M_NOWAIT | M_ZERO);
+ gdt[GLDT_SEL].sd.sd_lobase = (int)ldt;
+ gdt[GLDT_SEL].sd.sd_hibase = (u_int)ldt >> 24;
+ ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
+ ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1);
+ for (x = 0; x < nitems(ldt_segs); x++)
+ ssdtosd(&ldt_segs[x], &ldt[x].sd);
+
+ _default_ldt = GSEL(GLDT_SEL, SEL_KPL);
+ lldt(_default_ldt);
+ PCPU_SET(currentldt, _default_ldt);
+
+ copyout_buf = pmap_trm_alloc(TRAMP_COPYOUT_SZ, M_NOWAIT);
+ PCPU_SET(copyout_buf, copyout_buf);
+ copyout_init_tramp();
+}
+SYSINIT(vm_mem, SI_SUB_VM, SI_ORDER_SECOND, machdep_init_trampoline, NULL);
+
+#ifdef COMPAT_43
+static void
+i386_setup_lcall_gate(void)
+{
+ struct sysentvec *sv;
+ struct user_segment_descriptor desc;
+ u_int lcall_addr;
+
+ sv = &elf32_freebsd_sysvec;
+ lcall_addr = (uintptr_t)sv->sv_psstrings - sz_lcall_tramp;
+
+ bzero(&desc, sizeof(desc));
+ desc.sd_type = SDT_MEMERA;
+ desc.sd_dpl = SEL_UPL;
+ desc.sd_p = 1;
+ desc.sd_def32 = 1;
+ desc.sd_gran = 1;
+ desc.sd_lolimit = 0xffff;
+ desc.sd_hilimit = 0xf;
+ desc.sd_lobase = lcall_addr;
+ desc.sd_hibase = lcall_addr >> 24;
+ bcopy(&desc, &ldt[LSYS5CALLS_SEL], sizeof(desc));
+}
+SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_ANY, i386_setup_lcall_gate, NULL);
+#endif
+
void
cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
{
@@ -2507,6 +2709,7 @@
static void
f00f_hack(void *unused)
{
+ struct region_descriptor r_idt;
struct gate_descriptor *new_idt;
vm_offset_t tmp;
@@ -2517,16 +2720,19 @@
printf("Intel Pentium detected, installing workaround for F00F bug\n");
- tmp = kmem_malloc(kernel_arena, PAGE_SIZE * 2, M_WAITOK | M_ZERO);
+ tmp = (vm_offset_t)pmap_trm_alloc(PAGE_SIZE * 3, M_NOWAIT | M_ZERO);
if (tmp == 0)
panic("kmem_malloc returned 0");
+ tmp = round_page(tmp);
/* Put the problematic entry (#6) at the end of the lower page. */
- new_idt = (struct gate_descriptor*)
+ new_idt = (struct gate_descriptor *)
(tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor));
bcopy(idt, new_idt, sizeof(idt0));
r_idt.rd_base = (u_int)new_idt;
+ r_idt.rd_limit = sizeof(idt0) - 1;
lidt(&r_idt);
+ /* SMP machines do not need the F00F hack. */
idt = new_idt;
pmap_protect(kernel_pmap, tmp, tmp + PAGE_SIZE, VM_PROT_READ);
}
Index: sys/i386/i386/mem.c
===================================================================
--- sys/i386/i386/mem.c
+++ sys/i386/i386/mem.c
@@ -92,9 +92,6 @@
return EIO;
if (dev2unit(dev) == CDEV_MINOR_KMEM && uio->uio_resid > 0) {
- if (uio->uio_offset < (vm_offset_t)VADDR(PTDPTDI, 0))
- return (EFAULT);
-
if (!kernacc((caddr_t)(int)uio->uio_offset, uio->uio_resid,
uio->uio_rw == UIO_READ ? VM_PROT_READ : VM_PROT_WRITE))
return (EFAULT);
Index: sys/i386/i386/minidump_machdep.c
===================================================================
--- sys/i386/i386/minidump_machdep.c
+++ sys/i386/i386/minidump_machdep.c
@@ -190,7 +190,7 @@
* page written corresponds to 2MB of space
*/
ptesize += PAGE_SIZE;
- pd = (pd_entry_t *)((uintptr_t)IdlePTD + KERNBASE); /* always mapped! */
+ pd = IdlePTD; /* always mapped! */
j = va >> PDRSHIFT;
if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
/* This is an entire 2M page. */
@@ -281,7 +281,7 @@
/* Dump kernel page table pages */
for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) {
/* We always write a page, even if it is zero */
- pd = (pd_entry_t *)((uintptr_t)IdlePTD + KERNBASE); /* always mapped! */
+ pd = IdlePTD; /* always mapped! */
j = va >> PDRSHIFT;
if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
/* This is a single 2M block. Generate a fake PTP */
Index: sys/i386/i386/mp_machdep.c
===================================================================
--- sys/i386/i386/mp_machdep.c
+++ sys/i386/i386/mp_machdep.c
@@ -83,8 +83,8 @@
#include <machine/cpu.h>
#define WARMBOOT_TARGET 0
-#define WARMBOOT_OFF (KERNBASE + 0x0467)
-#define WARMBOOT_SEG (KERNBASE + 0x0469)
+#define WARMBOOT_OFF (PMAP_MAP_LOW + 0x0467)
+#define WARMBOOT_SEG (PMAP_MAP_LOW + 0x0469)
#define CMOS_REG (0x70)
#define CMOS_DATA (0x71)
@@ -139,6 +139,8 @@
static int start_all_aps(void);
static int start_ap(int apic_id);
+static char *ap_copyout_buf;
+static char *ap_tramp_stack_base;
/*
* Initialize the IPI handlers and start up the AP's.
*/
@@ -207,10 +209,10 @@
init_secondary(void)
{
struct pcpu *pc;
- vm_offset_t addr;
- int gsel_tss;
- int x, myid;
- u_int cr0;
+ struct i386tss *common_tssp;
+ struct region_descriptor r_gdt, r_idt;
+ int gsel_tss, myid, x;
+ u_int cr0;
/* bootAP is set in start_ap() to our ID. */
myid = bootAP;
@@ -224,11 +226,13 @@
pc->pc_apic_id = cpu_apic_ids[myid];
pc->pc_prvspace = pc;
pc->pc_curthread = 0;
+ pc->pc_common_tssp = common_tssp = &(__pcpu[0].pc_common_tssp)[myid];
fix_cpuid();
- gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
- gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
+ gdt_segs[GPRIV_SEL].ssd_base = (int)pc;
+ gdt_segs[GPROC0_SEL].ssd_base = (int)common_tssp;
+ gdt_segs[GLDT_SEL].ssd_base = (int)ldt;
for (x = 0; x < NGDT; x++) {
ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
@@ -238,21 +242,27 @@
r_gdt.rd_base = (int) &gdt[myid * NGDT];
lgdt(&r_gdt); /* does magic intra-segment return */
+ r_idt.rd_limit = sizeof(struct gate_descriptor) * NIDT - 1;
+ r_idt.rd_base = (int)idt;
lidt(&r_idt);
lldt(_default_ldt);
PCPU_SET(currentldt, _default_ldt);
+ PCPU_SET(trampstk, (uintptr_t)ap_tramp_stack_base + TRAMP_STACK_SZ -
+ VM86_STACK_SPACE);
+
gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
- PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
- PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
- PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
+ common_tssp->tss_esp0 = PCPU_GET(trampstk);
+ common_tssp->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
+ common_tssp->tss_ioopt = sizeof(struct i386tss) << 16;
PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
ltr(gsel_tss);
PCPU_SET(fsgs_gdt, &gdt[myid * NGDT + GUFS_SEL].sd);
+ PCPU_SET(copyout_buf, ap_copyout_buf);
/*
* Set to a known state:
@@ -274,8 +284,6 @@
/* BSP may have changed PTD while we were waiting */
invltlb();
- for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE)
- invlpg(addr);
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
lidt(&r_idt);
@@ -287,17 +295,20 @@
/*
* start each AP in our list
*/
-/* Lowest 1MB is already mapped: don't touch*/
#define TMPMAP_START 1
static int
start_all_aps(void)
{
u_char mpbiosreason;
u_int32_t mpbioswarmvec;
- int apic_id, cpu, i;
+ int apic_id, cpu;
mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
+ /* Remap lowest 1MB */
+ IdlePTD[0] = IdlePTD[1];
+ load_cr3(rcr3()); /* invalidate TLB */
+
/* install the AP 1st level boot code */
install_ap_tramp();
@@ -306,11 +317,7 @@
outb(CMOS_REG, BIOS_RESET);
mpbiosreason = inb(CMOS_DATA);
- /* set up temporary P==V mapping for AP boot */
- /* XXX this is a hack, we should boot the AP on its own stack/PTD */
- for (i = TMPMAP_START; i < NKPT; i++)
- PTD[i] = PTD[KPTDI + i];
- invltlb();
+ /* take advantage of the P==V mapping for PTD[0] for AP boot */
/* start each AP */
for (cpu = 1; cpu < mp_ncpus; cpu++) {
@@ -332,6 +339,9 @@
PAGE_SIZE - 4;
bootAP = cpu;
+ ap_tramp_stack_base = pmap_trm_alloc(TRAMP_STACK_SZ, M_NOWAIT);
+ ap_copyout_buf = pmap_trm_alloc(TRAMP_COPYOUT_SZ, M_NOWAIT);
+
/* attempt to start the Application Processor */
CHECK_INIT(99); /* setup checkpoints */
if (!start_ap(apic_id)) {
@@ -347,17 +357,16 @@
CPU_SET(cpu, &all_cpus); /* record AP in CPU map */
}
+ /* Unmap lowest 1MB again */
+ IdlePTD[0] = 0;
+ load_cr3(rcr3());
+
/* restore the warmstart vector */
*(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
outb(CMOS_REG, BIOS_RESET);
outb(CMOS_DATA, mpbiosreason);
- /* Undo V==P hack from above */
- for (i = TMPMAP_START; i < NKPT; i++)
- PTD[i] = 0;
- pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
-
/* number of APs actually started */
return mp_naps;
}
@@ -379,7 +388,7 @@
{
int x;
int size = *(int *) ((u_long) & bootMP_size);
- vm_offset_t va = boot_address + KERNBASE;
+ vm_offset_t va = boot_address;
u_char *src = (u_char *) ((u_long) bootMP);
u_char *dst = (u_char *) va;
u_int boot_base = (u_int) bootMP;
@@ -409,7 +418,7 @@
/* modify the ljmp target for MPentry() */
dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
- *dst32 = ((u_int) MPentry - KERNBASE);
+ *dst32 = (u_int)MPentry;
/* modify the target for boot code segment */
dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
Index: sys/i386/i386/mpboot.s
===================================================================
--- sys/i386/i386/mpboot.s
+++ sys/i386/i386/mpboot.s
@@ -37,8 +37,6 @@
#include "assym.inc"
-#define R(x) ((x)-KERNBASE)
-
/*
* this code MUST be enabled here and in mp_machdep.c
* it follows the very early stages of AP boot by placing values in CMOS ram.
@@ -80,18 +78,14 @@
movl $1,%eax
cpuid /* Retrieve features */
movl %cr4,%eax
-#ifndef DISABLE_PSE
testl $CPUID_PSE,%edx
jz 1f
orl $CR4_PSE,%eax /* Enable PSE */
1:
-#endif
-#ifndef DISABLE_PG_G
testl $CPUID_PGE,%edx
jz 1f
orl $CR4_PGE,%eax /* Enable PGE */
1:
-#endif
testl $CPUID_VME,%edx
jz 1f
orl $CR4_VME,%eax /* Enable VME */
@@ -100,13 +94,13 @@
/* Now enable paging mode */
#if defined(PAE) || defined(PAE_TABLES)
- movl R(IdlePDPT), %eax
+ movl IdlePDPT, %eax
movl %eax, %cr3
movl %cr4, %eax
orl $CR4_PAE, %eax
movl %eax, %cr4
#else
- movl R(IdlePTD), %eax
+ movl IdlePTD, %eax
movl %eax,%cr3
#endif
movl %cr0,%eax
Index: sys/i386/i386/pmap.c
===================================================================
--- sys/i386/i386/pmap.c
+++ sys/i386/i386/pmap.c
@@ -47,6 +47,8 @@
/*-
* Copyright (c) 2003 Networks Associates Technology, Inc.
* All rights reserved.
+ * Copyright (c) 2018 The FreeBSD Foundation
+ * All rights reserved.
*
* This software was developed for the FreeBSD Project by Jake Burkholder,
* Safeport Network Services, and Network Associates Laboratories, the
@@ -54,6 +56,10 @@
* DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
* CHATS research program.
*
+ * Portions of this software were developed by
+ * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
+ * the FreeBSD Foundation.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -121,6 +127,7 @@
#include <sys/sched.h>
#include <sys/sysctl.h>
#include <sys/smp.h>
+#include <sys/vmem.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -141,6 +148,7 @@
#include <machine/intr_machdep.h>
#include <x86/apicvar.h>
#endif
+#include <machine/bootinfo.h>
#include <machine/cpu.h>
#include <machine/cputypes.h>
#include <machine/md_var.h>
@@ -190,9 +198,6 @@
#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
struct pmap kernel_pmap_store;
-LIST_HEAD(pmaplist, pmap);
-static struct pmaplist allpmaps;
-static struct mtx allpmaps_lock;
vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
@@ -200,9 +205,7 @@
int pseflag = 0; /* PG_PS or-in */
static int nkpt = NKPT;
-vm_offset_t kernel_vm_end = KERNBASE + NKPT * NBPDR;
-extern u_int32_t KERNend;
-extern u_int32_t KPTphys;
+vm_offset_t kernel_vm_end = /* 0 + */ NKPT * NBPDR;
#if defined(PAE) || defined(PAE_TABLES)
pt_entry_t pg_nx;
@@ -343,29 +346,213 @@
static void *pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, int domain,
uint8_t *flags, int wait);
#endif
-static void pmap_set_pg(void);
+static void pmap_init_trm(void);
static __inline void pagezero(void *page);
CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
+void pmap_cold(void);
+extern char _end[];
+u_long physfree; /* phys addr of next free page */
+u_long vm86phystk; /* PA of vm86/bios stack */
+u_long vm86paddr; /* address of vm86 region */
+int vm86pa; /* phys addr of vm86 region */
+u_long KERNend; /* phys addr end of kernel (just after bss) */
+pd_entry_t *IdlePTD; /* phys addr of kernel PTD */
+#if defined(PAE) || defined(PAE_TABLES)
+pdpt_entry_t *IdlePDPT; /* phys addr of kernel PDPT */
+#endif
+pt_entry_t *KPTmap; /* address of kernel page tables */
+u_long KPTphys; /* phys addr of kernel page tables */
+
+static u_long
+allocpages(u_int cnt, u_long *physfree)
+{
+ u_long res;
+
+ res = *physfree;
+ *physfree += PAGE_SIZE * cnt;
+ bzero((void *)res, PAGE_SIZE * cnt);
+ return (res);
+}
+
+static void
+pmap_cold_map(u_long pa, u_long va, u_long cnt)
+{
+ pt_entry_t *pt;
+
+ for (pt = (pt_entry_t *)KPTphys + atop(va); cnt > 0;
+ cnt--, pt++, va += PAGE_SIZE, pa += PAGE_SIZE)
+ *pt = pa | PG_V | PG_RW | PG_A | PG_M;
+}
+
+static void
+pmap_cold_mapident(u_long pa, u_long cnt)
+{
+
+ pmap_cold_map(pa, pa, cnt);
+}
+
+_Static_assert(2 * NBPDR == KERNBASE, "Broken double-map of zero PTD");
+
/*
- * If you get an error here, then you set KVA_PAGES wrong! See the
- * description of KVA_PAGES in sys/i386/include/pmap.h. It must be
- * multiple of 4 for a normal kernel, or a multiple of 8 for a PAE.
+ * Called from locore.s before paging is enabled. Sets up the first
+ * kernel page table. Since kernel is mapped with PA == VA, this code
+ * does not require relocations.
*/
-CTASSERT(KERNBASE % (1 << 24) == 0);
+void
+pmap_cold(void)
+{
+ pt_entry_t *pt;
+ u_long a;
+ u_int cr3, ncr4;
+
+ physfree = (u_long)&_end;
+ if (bootinfo.bi_esymtab != 0)
+ physfree = bootinfo.bi_esymtab;
+ if (bootinfo.bi_kernend != 0)
+ physfree = bootinfo.bi_kernend;
+ physfree = roundup2(physfree, NBPDR);
+ KERNend = physfree;
+
+ /* Allocate Kernel Page Tables */
+ KPTphys = allocpages(NKPT, &physfree);
+ KPTmap = (pt_entry_t *)KPTphys;
+
+ /* Allocate Page Table Directory */
+#if defined(PAE) || defined(PAE_TABLES)
+ /* XXX only need 32 bytes (easier for now) */
+ IdlePDPT = (pdpt_entry_t *)allocpages(1, &physfree);
+#endif
+ IdlePTD = (pd_entry_t *)allocpages(NPGPTD, &physfree);
+
+ /*
+ * Allocate KSTACK. Leave a guard page between IdlePTD and
+ * proc0kstack, to control stack overflow for thread0 and
+ * prevent corruption of the page table. We leak the guard
+ * physical memory due to 1:1 mappings.
+ */
+ allocpages(1, &physfree);
+ proc0kstack = allocpages(TD0_KSTACK_PAGES, &physfree);
+
+ /* vm86/bios stack */
+ vm86phystk = allocpages(1, &physfree);
+
+ /* pgtable + ext + IOPAGES */
+ vm86paddr = vm86pa = allocpages(3, &physfree);
+
+ /* Install page tables into PTD. Page table page 1 is wasted. */
+ for (a = 0; a < NKPT; a++)
+ IdlePTD[a] = (KPTphys + ptoa(a)) | PG_V | PG_RW | PG_A | PG_M;
+
+#if defined(PAE) || defined(PAE_TABLES)
+ /* PAE install PTD pointers into PDPT */
+ for (a = 0; a < NPGPTD; a++)
+ IdlePDPT[a] = ((u_int)IdlePTD + ptoa(a)) | PG_V;
+#endif
+
+ /*
+ * Install recursive mapping for kernel page tables into
+ * itself.
+ */
+ for (a = 0; a < NPGPTD; a++)
+ IdlePTD[PTDPTDI + a] = ((u_int)IdlePTD + ptoa(a)) | PG_V |
+ PG_RW;
+
+ /*
+ * Initialize page table pages mapping physical address zero
+ * through the (physical) end of the kernel. Many of these
+ * pages must be reserved, and we reserve them all and map
+ * them linearly for convenience. We do this even if we've
+ * enabled PSE above; we'll just switch the corresponding
+ * kernel PDEs before we turn on paging.
+ *
+ * This and all other page table entries allow read and write
+ * access for various reasons. Kernel mappings never have any
+ * access restrictions.
+ */
+ pmap_cold_mapident(0, atop(NBPDR));
+ pmap_cold_map(0, NBPDR, atop(NBPDR));
+ pmap_cold_mapident(KERNBASE, atop(KERNend - KERNBASE));
+
+ /* Map page table directory */
+#if defined(PAE) || defined(PAE_TABLES)
+ pmap_cold_mapident((u_long)IdlePDPT, 1);
+#endif
+ pmap_cold_mapident((u_long)IdlePTD, NPGPTD);
+
+ /* Map early KPTmap. It is really pmap_cold_mapident. */
+ pmap_cold_map(KPTphys, (u_long)KPTmap, NKPT);
+
+ /* Map proc0kstack */
+ pmap_cold_mapident(proc0kstack, TD0_KSTACK_PAGES);
+ /* ISA hole already mapped */
+
+ pmap_cold_mapident(vm86phystk, 1);
+ pmap_cold_mapident(vm86pa, 3);
+
+ /* Map page 0 into the vm86 page table */
+ *(pt_entry_t *)vm86pa = 0 | PG_RW | PG_U | PG_A | PG_M | PG_V;
+
+ /* ...likewise for the ISA hole for vm86 */
+ for (pt = (pt_entry_t *)vm86pa + atop(ISA_HOLE_START), a = 0;
+ a < atop(ISA_HOLE_LENGTH); a++, pt++)
+ *pt = (ISA_HOLE_START + ptoa(a)) | PG_RW | PG_U | PG_A |
+ PG_M | PG_V;
+
+ /* Enable PSE, PGE, VME, and PAE if configured. */
+ ncr4 = 0;
+ if ((cpu_feature & CPUID_PSE) != 0) {
+ ncr4 |= CR4_PSE;
+ /*
+ * Superpage mapping of the kernel text. Existing 4k
+ * page table pages are wasted.
+ */
+ for (a = KERNBASE; a < KERNend; a += NBPDR)
+ IdlePTD[a >> PDRSHIFT] = a | PG_PS | PG_A | PG_M |
+ PG_RW | PG_V;
+ }
+ if ((cpu_feature & CPUID_PGE) != 0) {
+ ncr4 |= CR4_PGE;
+ pgeflag = PG_G;
+ }
+ ncr4 |= (cpu_feature & CPUID_VME) != 0 ? CR4_VME : 0;
+#if defined(PAE) || defined(PAE_TABLES)
+ ncr4 |= CR4_PAE;
+#endif
+ if (ncr4 != 0)
+ load_cr4(rcr4() | ncr4);
+
+ /* Now enable paging */
+#if defined(PAE) || defined(PAE_TABLES)
+ cr3 = (u_int)IdlePDPT;
+#else
+ cr3 = (u_int)IdlePTD;
+#endif
+ load_cr3(cr3);
+ load_cr0(rcr0() | CR0_PG);
+
+ /*
+ * Now running relocated at KERNBASE where the system is
+ * linked to run.
+ */
+
+ /*
+ * Remove the lowest part of the double mapping of low memory
+ * to get some null pointer checks.
+ */
+ IdlePTD[0] = 0;
+ load_cr3(cr3); /* invalidate TLB */
+}
/*
* Bootstrap the system enough to run with virtual memory.
*
* On the i386 this is called after mapping has already been enabled
+ * in locore.s with the page table created in pmap_cold(),
* and just syncs the pmap module with what has already been done.
- * [We can't call it easily with mapping off since the kernel is not
- * mapped with PA == VA, hence we would have to relocate every address
- * from the linked base (virtual) address "KERNBASE" to the actual
- * (physical) address starting relative to 0]
*/
void
pmap_bootstrap(vm_paddr_t firstaddr)
@@ -391,7 +578,7 @@
* page that it allocated. Preferably, locore would provide a first
* unused virtual address in addition to "firstaddr".
*/
- virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
+ virtual_avail = (vm_offset_t)firstaddr;
virtual_end = VM_MAX_KERNEL_ADDRESS;
@@ -399,9 +586,9 @@
* Initialize the kernel pmap (which is statically allocated).
*/
PMAP_LOCK_INIT(kernel_pmap);
- kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
+ kernel_pmap->pm_pdir = IdlePTD;
#if defined(PAE) || defined(PAE_TABLES)
- kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
+ kernel_pmap->pm_pdpt = IdlePDPT;
#endif
CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
@@ -411,19 +598,6 @@
*/
rw_init(&pvh_global_lock, "pmap pv global");
- LIST_INIT(&allpmaps);
-
- /*
- * Request a spin mutex so that changes to allpmaps cannot be
- * preempted by smp_rendezvous_cpus(). Otherwise,
- * pmap_update_pde_kernel() could access allpmaps while it is
- * being changed.
- */
- mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
- mtx_lock_spin(&allpmaps_lock);
- LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
- mtx_unlock_spin(&allpmaps_lock);
-
/*
* Reserve some special page table entries/VA space for temporary
* mapping of pages.
@@ -474,14 +648,7 @@
SYSMAP(pt_entry_t *, KPTD, KPTmap, KVA_PAGES)
for (i = 0; i < NKPT; i++)
- KPTD[i] = (KPTphys + (i << PAGE_SHIFT)) | pgeflag | PG_RW | PG_V;
-
- /*
- * Adjust the start of the KPTD and KPTmap so that the implementation
- * of pmap_kextract() and pmap_growkernel() can be made simpler.
- */
- KPTD -= KPTDI;
- KPTmap -= i386_btop(KPTDI << PDRSHIFT);
+ KPTD[i] = (KPTphys + ptoa(i)) | PG_RW | PG_V;
/*
* PADDR1 and PADDR2 are used by pmap_pte_quick() and pmap_pte(),
@@ -494,18 +661,6 @@
virtual_avail = va;
- /*
- * Finish removing the identity mapping (virt == phys) of low memory.
- * It was only used for 2 instructions in locore. locore then
- * unmapped the first PTD to get some null pointer checks. ACPI
- * wakeup will map the first PTD transiently to use it for 1
- * instruction. The double mapping for low memory is not usable in
- * normal operation since it breaks trapping of null pointers and
- * causes inconsistencies in page tables when combined with PG_G.
- */
- for (i = 1; i < NKPT; i++)
- PTD[i] = 0;
-
/*
* Initialize the PAT MSR if present.
* pmap_init_pat() clears and sets CR4_PGE, which, as a
@@ -515,9 +670,6 @@
* comes with PAT. Both features were added for Pentium Pro.
*/
pmap_init_pat();
-
- /* Turn on PG_G on kernel page(s) */
- pmap_set_pg();
}
static void
@@ -529,21 +681,32 @@
CPU_FOREACH(i) {
pc = pcpu_find(i);
+ mtx_init(&pc->pc_copyout_mlock, "cpmlk", NULL, MTX_DEF |
+ MTX_NEW);
+ pc->pc_copyout_maddr = kva_alloc(ptoa(2));
+ if (pc->pc_copyout_maddr == 0)
+ panic("unable to allocate non-sleepable copyout KVA");
+ sx_init(&pc->pc_copyout_slock, "cpslk");
+ pc->pc_copyout_saddr = kva_alloc(ptoa(2));
+ if (pc->pc_copyout_saddr == 0)
+ panic("unable to allocate sleepable copyout KVA");
+
/*
- * Skip if the mapping has already been initialized,
+ * Skip if the mappings have already been initialized,
* i.e. this is the BSP.
*/
if (pc->pc_cmap_addr1 != 0)
continue;
+
mtx_init(&pc->pc_cmap_lock, "SYSMAPS", NULL, MTX_DEF);
pages = kva_alloc(PAGE_SIZE * 3);
if (pages == 0)
- panic("%s: unable to allocate KVA", __func__);
+ panic("unable to allocate CMAP KVA");
pc->pc_cmap_pte1 = vtopte(pages);
pc->pc_cmap_pte2 = vtopte(pages + PAGE_SIZE);
pc->pc_cmap_addr1 = (caddr_t)pages;
pc->pc_cmap_addr2 = (caddr_t)(pages + PAGE_SIZE);
- pc->pc_qmap_addr = pages + (PAGE_SIZE * 2);
+ pc->pc_qmap_addr = pages + atop(2);
}
}
@@ -653,39 +816,6 @@
load_cr4(cr4);
}
-/*
- * Set PG_G on kernel pages. Only the BSP calls this when SMP is turned on.
- */
-static void
-pmap_set_pg(void)
-{
- pt_entry_t *pte;
- vm_offset_t va, endva;
-
- if (pgeflag == 0)
- return;
-
- endva = KERNBASE + KERNend;
-
- if (pseflag) {
- va = KERNBASE + roundup2(KERNLOAD, NBPDR);
- while (va < endva) {
- pdir_pde(PTD, va) |= pgeflag;
- invltlb(); /* Flush non-PG_G entries. */
- va += NBPDR;
- }
- } else {
- va = (vm_offset_t)btext;
- while (va < endva) {
- pte = vtopte(va);
- if (*pte)
- *pte |= pgeflag;
- invltlb(); /* Flush non-PG_G entries. */
- va += PAGE_SIZE;
- }
- }
-}
-
/*
* Initialize a vm_page's machine-dependent fields.
*/
@@ -783,12 +913,12 @@
* page table pages.
*/
for (i = 0; i < NKPT; i++) {
- mpte = PHYS_TO_VM_PAGE(KPTphys + (i << PAGE_SHIFT));
+ mpte = PHYS_TO_VM_PAGE(KPTphys + ptoa(i));
KASSERT(mpte >= vm_page_array &&
mpte < &vm_page_array[vm_page_array_size],
("pmap_init: page table page is out of range"));
mpte->pindex = i + KPTDI;
- mpte->phys_addr = KPTphys + (i << PAGE_SHIFT);
+ mpte->phys_addr = KPTphys + ptoa(i);
}
/*
@@ -859,6 +989,8 @@
#endif
pmap_initialized = 1;
+ pmap_init_trm();
+
if (!bootverbose)
return;
for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
@@ -868,6 +1000,7 @@
printf("PPIM %u: PA=%#jx, VA=%#x, size=%#x, mode=%#x\n", i,
(uintmax_t)ppim->pa, ppim->va, ppim->sz, ppim->mode);
}
+
}
@@ -935,21 +1068,9 @@
pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde)
{
pd_entry_t *pde;
- pmap_t pmap;
- boolean_t PTD_updated;
-
- PTD_updated = FALSE;
- mtx_lock_spin(&allpmaps_lock);
- LIST_FOREACH(pmap, &allpmaps, pm_list) {
- if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] &
- PG_FRAME))
- PTD_updated = TRUE;
- pde = pmap_pde(pmap, va);
- pde_store(pde, newpde);
- }
- mtx_unlock_spin(&allpmaps_lock);
- KASSERT(PTD_updated,
- ("pmap_kenter_pde: current page table is not in allpmaps"));
+
+ pde = pmap_pde(kernel_pmap, va);
+ pde_store(pde, newpde);
}
/*
@@ -962,47 +1083,23 @@
static void
pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
{
- u_long cr4;
if ((newpde & PG_PS) == 0)
/* Demotion: flush a specific 2MB page mapping. */
invlpg(va);
- else if ((newpde & PG_G) == 0)
+ else /* if ((newpde & PG_G) == 0) */
/*
* Promotion: flush every 4KB page mapping from the TLB
* because there are too many to flush individually.
*/
invltlb();
- else {
- /*
- * Promotion: flush every 4KB page mapping from the TLB,
- * including any global (PG_G) mappings.
- */
- cr4 = rcr4();
- load_cr4(cr4 & ~CR4_PGE);
- /*
- * Although preemption at this point could be detrimental to
- * performance, it would not lead to an error. PG_G is simply
- * ignored if CR4.PGE is clear. Moreover, in case this block
- * is re-entered, the load_cr4() either above or below will
- * modify CR4.PGE flushing the TLB.
- */
- load_cr4(cr4 | CR4_PGE);
- }
}
void
invltlb_glob(void)
{
- uint64_t cr4;
- if (pgeflag == 0) {
- invltlb();
- } else {
- cr4 = rcr4();
- load_cr4(cr4 & ~CR4_PGE);
- load_cr4(cr4 | CR4_PGE);
- }
+ invltlb();
}
@@ -1033,15 +1130,15 @@
u_int cpuid;
sched_pin();
- if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
+ if (pmap == kernel_pmap) {
invlpg(va);
mask = &all_cpus;
+ } else if (!CPU_CMP(&pmap->pm_active, &all_cpus)) {
+ mask = &all_cpus;
} else {
cpuid = PCPU_GET(cpuid);
other_cpus = all_cpus;
CPU_CLR(cpuid, &other_cpus);
- if (CPU_ISSET(cpuid, &pmap->pm_active))
- invlpg(va);
CPU_AND(&other_cpus, &pmap->pm_active);
mask = &other_cpus;
}
@@ -1065,17 +1162,16 @@
}
sched_pin();
- if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
+ if (pmap == kernel_pmap) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
mask = &all_cpus;
+ } else if (!CPU_CMP(&pmap->pm_active, &all_cpus)) {
+ mask = &all_cpus;
} else {
cpuid = PCPU_GET(cpuid);
other_cpus = all_cpus;
CPU_CLR(cpuid, &other_cpus);
- if (CPU_ISSET(cpuid, &pmap->pm_active))
- for (addr = sva; addr < eva; addr += PAGE_SIZE)
- invlpg(addr);
CPU_AND(&other_cpus, &pmap->pm_active);
mask = &other_cpus;
}
@@ -1091,17 +1187,14 @@
sched_pin();
if (pmap == kernel_pmap) {
- invltlb_glob();
+ invltlb();
mask = &all_cpus;
} else if (!CPU_CMP(&pmap->pm_active, &all_cpus)) {
- invltlb();
mask = &all_cpus;
} else {
cpuid = PCPU_GET(cpuid);
other_cpus = all_cpus;
CPU_CLR(cpuid, &other_cpus);
- if (CPU_ISSET(cpuid, &pmap->pm_active))
- invltlb();
CPU_AND(&other_cpus, &pmap->pm_active);
mask = &other_cpus;
}
@@ -1132,19 +1225,10 @@
{
struct pde_action *act = arg;
pd_entry_t *pde;
- pmap_t pmap;
if (act->store == PCPU_GET(cpuid)) {
-
- /*
- * Elsewhere, this operation requires allpmaps_lock for
- * synchronization. Here, it does not because it is being
- * performed in the context of an all_cpus rendezvous.
- */
- LIST_FOREACH(pmap, &allpmaps, pm_list) {
- pde = pmap_pde(pmap, act->va);
- pde_store(pde, act->newpde);
- }
+ pde = pmap_pde(kernel_pmap, act->va);
+ pde_store(pde, act->newpde);
}
}
@@ -1219,7 +1303,7 @@
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
- if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
+ if (pmap == kernel_pmap)
invlpg(va);
}
@@ -1228,7 +1312,7 @@
{
vm_offset_t addr;
- if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
+ if (pmap == kernel_pmap)
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
}
@@ -1238,8 +1322,6 @@
{
if (pmap == kernel_pmap)
- invltlb_glob();
- else if (!CPU_EMPTY(&pmap->pm_active))
invltlb();
}
@@ -1371,8 +1453,7 @@
pmap_is_current(pmap_t pmap)
{
- return (pmap == kernel_pmap || pmap ==
- vmspace_pmap(curthread->td_proc->p_vmspace));
+ return (pmap == kernel_pmap);
}
/*
@@ -1570,7 +1651,7 @@
pt_entry_t *pte;
pte = vtopte(va);
- pte_store(pte, pa | PG_RW | PG_V | pgeflag);
+ pte_store(pte, pa | PG_RW | PG_V);
}
static __inline void
@@ -1579,7 +1660,7 @@
pt_entry_t *pte;
pte = vtopte(va);
- pte_store(pte, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0));
+ pte_store(pte, pa | PG_RW | PG_V | pmap_cache_bits(mode, 0));
}
/*
@@ -1638,7 +1719,7 @@
pseflag) {
KASSERT((va & PDRMASK) == 0,
("pmap_map: misaligned va %#x", va));
- newpde = start | PG_PS | pgeflag | PG_RW | PG_V;
+ newpde = start | PG_PS | PG_RW | PG_V;
pmap_kenter_pde(va, newpde);
va += NBPDR;
start += NBPDR;
@@ -1678,9 +1759,9 @@
if ((*pte & (PG_FRAME | PG_PTE_CACHE)) != pa) {
oldpte |= *pte;
#if defined(PAE) || defined(PAE_TABLES)
- pte_store(pte, pa | pgeflag | pg_nx | PG_RW | PG_V);
+ pte_store(pte, pa | pg_nx | PG_RW | PG_V);
#else
- pte_store(pte, pa | pgeflag | PG_RW | PG_V);
+ pte_store(pte, pa | PG_RW | PG_V);
#endif
}
pte++;
@@ -1809,7 +1890,7 @@
pd_entry_t ptepde;
vm_page_t mpte;
- if (va >= VM_MAXUSER_ADDRESS)
+ if (pmap == kernel_pmap)
return (0);
ptepde = *pmap_pde(pmap, va);
mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
@@ -1824,14 +1905,9 @@
{
PMAP_LOCK_INIT(pmap);
- /*
- * Since the page table directory is shared with the kernel pmap,
- * which is already included in the list "allpmaps", this pmap does
- * not need to be inserted into that list.
- */
- pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
+ pmap->pm_pdir = IdlePTD;
#if defined(PAE) || defined(PAE_TABLES)
- pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
+ pmap->pm_pdpt = IdlePDPT;
#endif
pmap->pm_root.rt_root = 0;
CPU_ZERO(&pmap->pm_active);
@@ -1847,8 +1923,7 @@
int
pmap_pinit(pmap_t pmap)
{
- vm_page_t m, ptdpg[NPGPTD];
- vm_paddr_t pa;
+ vm_page_t m;
int i;
/*
@@ -1878,32 +1953,25 @@
for (i = 0; i < NPGPTD;) {
m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
VM_ALLOC_WIRED | VM_ALLOC_ZERO);
- if (m == NULL)
+ if (m == NULL) {
vm_wait(NULL);
- else
- ptdpg[i++] = m;
+ } else {
+ pmap->pm_ptdpg[i] = m;
+#if defined(PAE) || defined(PAE_TABLES)
+ pmap->pm_pdpt[i] = VM_PAGE_TO_PHYS(m) | PG_V;
+#endif
+ i++;
+ }
}
- pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
+ pmap_qenter((vm_offset_t)pmap->pm_pdir, pmap->pm_ptdpg, NPGPTD);
for (i = 0; i < NPGPTD; i++)
- if ((ptdpg[i]->flags & PG_ZERO) == 0)
+ if ((pmap->pm_ptdpg[i]->flags & PG_ZERO) == 0)
pagezero(pmap->pm_pdir + (i * NPDEPG));
- mtx_lock_spin(&allpmaps_lock);
- LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
- /* Copy the kernel page table directory entries. */
- bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
- mtx_unlock_spin(&allpmaps_lock);
-
- /* install self-referential address mapping entry(s) */
- for (i = 0; i < NPGPTD; i++) {
- pa = VM_PAGE_TO_PHYS(ptdpg[i]);
- pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M;
-#if defined(PAE) || defined(PAE_TABLES)
- pmap->pm_pdpt[i] = pa | PG_V;
-#endif
- }
+ /* Install the trampoline mapping. */
+ pmap->pm_pdir[TRPTDI] = PTD[TRPTDI];
CPU_ZERO(&pmap->pm_active);
TAILQ_INIT(&pmap->pm_pvchunk);
@@ -2016,7 +2084,7 @@
void
pmap_release(pmap_t pmap)
{
- vm_page_t m, ptdpg[NPGPTD];
+ vm_page_t m;
int i;
KASSERT(pmap->pm_stats.resident_count == 0,
@@ -2027,27 +2095,16 @@
KASSERT(CPU_EMPTY(&pmap->pm_active),
("releasing active pmap %p", pmap));
- mtx_lock_spin(&allpmaps_lock);
- LIST_REMOVE(pmap, pm_list);
- mtx_unlock_spin(&allpmaps_lock);
-
- for (i = 0; i < NPGPTD; i++)
- ptdpg[i] = PHYS_TO_VM_PAGE(pmap->pm_pdir[PTDPTDI + i] &
- PG_FRAME);
-
- bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) *
- sizeof(*pmap->pm_pdir));
-
pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
for (i = 0; i < NPGPTD; i++) {
- m = ptdpg[i];
+ m = pmap->pm_ptdpg[i];
#if defined(PAE) || defined(PAE_TABLES)
KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME),
("pmap_release: got wrong ptd page"));
#endif
vm_page_unwire_noq(m);
- vm_page_free_zero(m);
+ vm_page_free(m);
}
}
@@ -2107,7 +2164,7 @@
pmap_zero_page(nkpg);
ptppaddr = VM_PAGE_TO_PHYS(nkpg);
newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
- pdir_pde(KPTD, kernel_vm_end) = pgeflag | newpdir;
+ pdir_pde(KPTD, kernel_vm_end) = newpdir;
pmap_kenter_pde(kernel_vm_end, newpdir);
kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
@@ -2665,7 +2722,7 @@
" in pmap %p", va, pmap);
return (FALSE);
}
- if (va < VM_MAXUSER_ADDRESS)
+ if (pmap != kernel_pmap)
pmap->pm_stats.resident_count++;
}
mptepa = VM_PAGE_TO_PHYS(mpte);
@@ -2676,7 +2733,7 @@
* temporarily map the page table page (mpte) into the kernel's
* address space at either PADDR1 or PADDR2.
*/
- if (va >= KERNBASE)
+ if (pmap == kernel_pmap)
firstpte = &KPTmap[i386_btop(trunc_4mpage(va))];
else if (curthread->td_pinned > 0 && rw_wowned(&pvh_global_lock)) {
if ((*PMAP1 & PG_FRAME) != mptepa) {
@@ -3471,9 +3528,11 @@
mpte = NULL;
wired = (flags & PMAP_ENTER_WIRED) != 0;
- KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
- KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
- ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)",
+ KASSERT((pmap == kernel_pmap && va < VM_MAX_KERNEL_ADDRESS) ||
+ (pmap != kernel_pmap && va < VM_MAXUSER_ADDRESS),
+ ("pmap_enter: toobig k%d %#x", pmap == kernel_pmap, va));
+ KASSERT(va < PMAP_TRM_MIN_ADDRESS,
+ ("pmap_enter: invalid to pmap_enter into trampoline (va: 0x%x)",
va));
if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
VM_OBJECT_ASSERT_LOCKED(m->object);
@@ -3483,7 +3542,7 @@
sched_pin();
pde = pmap_pde(pmap, va);
- if (va < VM_MAXUSER_ADDRESS) {
+ if (pmap != kernel_pmap) {
/*
* va is for UVA.
* In the case that a page table page is not resident,
@@ -3582,7 +3641,8 @@
* Enter on the PV list if part of our managed memory.
*/
if ((m->oflags & VPO_UNMANAGED) == 0) {
- KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
+ KASSERT(pmap != kernel_pmap || va < kmi.clean_sva ||
+ va >= kmi.clean_eva,
("pmap_enter: managed mapping within the clean submap"));
if (pv == NULL)
pv = get_pv_entry(pmap, FALSE);
@@ -3614,10 +3674,8 @@
#endif
if (wired)
newpte |= PG_W;
- if (va < VM_MAXUSER_ADDRESS)
+ if (pmap != kernel_pmap)
newpte |= PG_U;
- if (pmap == kernel_pmap)
- newpte |= pgeflag;
/*
* if the mapping or permission bits are different, we need
@@ -3802,8 +3860,8 @@
vm_paddr_t pa;
struct spglist free;
- KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
- (m->oflags & VPO_UNMANAGED) != 0,
+ KASSERT(pmap != kernel_pmap || va < kmi.clean_sva ||
+ va >= kmi.clean_eva || (m->oflags & VPO_UNMANAGED) != 0,
("pmap_enter_quick_locked: managed mapping within the clean submap"));
rw_assert(&pvh_global_lock, RA_WLOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
@@ -3812,7 +3870,7 @@
* In the case that a page table page is not
* resident, we are creating it here.
*/
- if (va < VM_MAXUSER_ADDRESS) {
+ if (pmap != kernel_pmap) {
u_int ptepindex;
pd_entry_t ptepa;
@@ -3848,18 +3906,14 @@
mpte = NULL;
}
- /*
- * This call to vtopte makes the assumption that we are
- * entering the page into the current pmap. In order to support
- * quick entry into any pmap, one would likely use pmap_pte_quick.
- * But that isn't as quick as vtopte.
- */
- pte = vtopte(va);
+ /* XXXKIB: pmap_pte_quick() instead ? */
+ pte = pmap_pte(pmap, va);
if (*pte) {
if (mpte != NULL) {
mpte->wire_count--;
mpte = NULL;
}
+ pmap_pte_release(pte);
return (mpte);
}
@@ -3877,6 +3931,7 @@
mpte = NULL;
}
+ pmap_pte_release(pte);
return (mpte);
}
@@ -3898,6 +3953,7 @@
pte_store(pte, pa | PG_V | PG_U);
else
pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
+ pmap_pte_release(pte);
return (mpte);
}
@@ -4089,122 +4145,18 @@
* from the source map to the range dst_addr/len
* in the destination map.
*
- * This routine is only advisory and need not do anything.
+ * This routine is only advisory and need not do anything. Since
+ * current pmap is always the kernel pmap when executing in
+ * kernel, and we do not copy from the kernel pmap to a user
+ * pmap, this optimization is not usable in 4/4G full split i386
+ * world.
*/
void
pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
vm_offset_t src_addr)
{
- struct spglist free;
- vm_offset_t addr;
- vm_offset_t end_addr = src_addr + len;
- vm_offset_t pdnxt;
-
- if (dst_addr != src_addr)
- return;
-
- if (!pmap_is_current(src_pmap))
- return;
-
- rw_wlock(&pvh_global_lock);
- if (dst_pmap < src_pmap) {
- PMAP_LOCK(dst_pmap);
- PMAP_LOCK(src_pmap);
- } else {
- PMAP_LOCK(src_pmap);
- PMAP_LOCK(dst_pmap);
- }
- sched_pin();
- for (addr = src_addr; addr < end_addr; addr = pdnxt) {
- pt_entry_t *src_pte, *dst_pte;
- vm_page_t dstmpte, srcmpte;
- pd_entry_t srcptepaddr;
- u_int ptepindex;
-
- KASSERT(addr < UPT_MIN_ADDRESS,
- ("pmap_copy: invalid to pmap_copy page tables"));
-
- pdnxt = (addr + NBPDR) & ~PDRMASK;
- if (pdnxt < addr)
- pdnxt = end_addr;
- ptepindex = addr >> PDRSHIFT;
-
- srcptepaddr = src_pmap->pm_pdir[ptepindex];
- if (srcptepaddr == 0)
- continue;
-
- if (srcptepaddr & PG_PS) {
- if ((addr & PDRMASK) != 0 || addr + NBPDR > end_addr)
- continue;
- if (dst_pmap->pm_pdir[ptepindex] == 0 &&
- ((srcptepaddr & PG_MANAGED) == 0 ||
- pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr &
- PG_PS_FRAME))) {
- dst_pmap->pm_pdir[ptepindex] = srcptepaddr &
- ~PG_W;
- dst_pmap->pm_stats.resident_count +=
- NBPDR / PAGE_SIZE;
- pmap_pde_mappings++;
- }
- continue;
- }
-
- srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME);
- KASSERT(srcmpte->wire_count > 0,
- ("pmap_copy: source page table page is unused"));
-
- if (pdnxt > end_addr)
- pdnxt = end_addr;
-
- src_pte = vtopte(addr);
- while (addr < pdnxt) {
- pt_entry_t ptetemp;
- ptetemp = *src_pte;
- /*
- * we only virtual copy managed pages
- */
- if ((ptetemp & PG_MANAGED) != 0) {
- dstmpte = pmap_allocpte(dst_pmap, addr,
- PMAP_ENTER_NOSLEEP);
- if (dstmpte == NULL)
- goto out;
- dst_pte = pmap_pte_quick(dst_pmap, addr);
- if (*dst_pte == 0 &&
- pmap_try_insert_pv_entry(dst_pmap, addr,
- PHYS_TO_VM_PAGE(ptetemp & PG_FRAME))) {
- /*
- * Clear the wired, modified, and
- * accessed (referenced) bits
- * during the copy.
- */
- *dst_pte = ptetemp & ~(PG_W | PG_M |
- PG_A);
- dst_pmap->pm_stats.resident_count++;
- } else {
- SLIST_INIT(&free);
- if (pmap_unwire_ptp(dst_pmap, dstmpte,
- &free)) {
- pmap_invalidate_page(dst_pmap,
- addr);
- vm_page_free_pages_toq(&free,
- true);
- }
- goto out;
- }
- if (dstmpte->wire_count >= srcmpte->wire_count)
- break;
- }
- addr += PAGE_SIZE;
- src_pte++;
- }
- }
-out:
- sched_unpin();
- rw_wunlock(&pvh_global_lock);
- PMAP_UNLOCK(src_pmap);
- PMAP_UNLOCK(dst_pmap);
-}
+}
/*
* Zero 1 page of virtual memory mapped from a hardware page by the caller.
@@ -4519,7 +4471,7 @@
pte = pmap_pde(pmap, pv->pv_va);
tpte = *pte;
if ((tpte & PG_PS) == 0) {
- pte = vtopte(pv->pv_va);
+ pte = pmap_pte_quick(pmap, pv->pv_va);
tpte = *pte & ~PG_PTE_PAT;
}
@@ -4685,8 +4637,10 @@
PMAP_LOCK(pmap);
pde = pmap_pde(pmap, addr);
if (*pde != 0 && (*pde & PG_PS) == 0) {
- pte = vtopte(addr);
- rv = *pte == 0;
+ pte = pmap_pte(pmap, addr);
+ if (pte != NULL)
+ rv = *pte == 0;
+ pmap_pte_release(pte);
}
PMAP_UNLOCK(pmap);
return (rv);
@@ -5188,8 +5142,8 @@
size = round_page(offset + size);
pa = pa & PG_FRAME;
- if (pa < KERNLOAD && pa + size <= KERNLOAD)
- va = KERNBASE + pa;
+ if (pa < PMAP_MAP_LOW && pa + size <= PMAP_MAP_LOW)
+ va = pa + PMAP_MAP_LOW;
else if (!pmap_initialized) {
va = 0;
for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
@@ -5248,7 +5202,7 @@
vm_offset_t offset;
int i;
- if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD)
+ if (va >= PMAP_MAP_LOW && va <= KERNBASE && va + size <= KERNBASE)
return;
offset = va & PAGE_MASK;
size = round_page(offset + size);
@@ -5545,7 +5499,6 @@
* pmap_activate is for the current thread on the current cpu
*/
td->td_pcb->pcb_cr3 = cr3;
- load_cr3(cr3);
PCPU_SET(curpmap, pmap);
critical_exit();
}
@@ -5613,6 +5566,80 @@
critical_exit();
}
+static vmem_t *pmap_trm_arena;
+static vmem_addr_t pmap_trm_arena_last = PMAP_TRM_MIN_ADDRESS;
+static int trm_guard = PAGE_SIZE;
+
+static int
+pmap_trm_import(void *unused __unused, vmem_size_t size, int flags,
+ vmem_addr_t *addrp)
+{
+ vm_page_t m;
+ vmem_addr_t af, addr, prev_addr;
+ pt_entry_t *trm_pte;
+
+ prev_addr = atomic_load_long(&pmap_trm_arena_last);
+ size = round_page(size) + trm_guard;
+ for (;;) {
+ if (prev_addr + size < prev_addr || prev_addr + size < size ||
+ prev_addr + size > PMAP_TRM_MAX_ADDRESS)
+ return (ENOMEM);
+ addr = prev_addr + size;
+ if (atomic_fcmpset_int(&pmap_trm_arena_last, &prev_addr, addr))
+ break;
+ }
+ prev_addr += trm_guard;
+ trm_pte = PTmap + atop(prev_addr);
+ for (af = prev_addr; af < addr; af += PAGE_SIZE) {
+ m = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_NOBUSY |
+ VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_WAITOK);
+ pte_store(&trm_pte[atop(af - prev_addr)], VM_PAGE_TO_PHYS(m) |
+ PG_M | PG_A | PG_RW | PG_V | pgeflag |
+ pmap_cache_bits(VM_MEMATTR_DEFAULT, FALSE));
+ }
+ *addrp = prev_addr;
+ return (0);
+}
+
+static
+void pmap_init_trm(void)
+{
+ vm_page_t pd_m;
+
+ TUNABLE_INT_FETCH("machdep.trm_guard", &trm_guard);
+ if ((trm_guard & PAGE_MASK) != 0)
+ trm_guard = 0;
+ pmap_trm_arena = vmem_create("i386trampoline", 0, 0, 1, 0, M_WAITOK);
+ vmem_set_import(pmap_trm_arena, pmap_trm_import, NULL, NULL, PAGE_SIZE);
+ pd_m = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_NOBUSY |
+ VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_WAITOK | VM_ALLOC_ZERO);
+ if ((pd_m->flags & PG_ZERO) == 0)
+ pmap_zero_page(pd_m);
+ PTD[TRPTDI] = VM_PAGE_TO_PHYS(pd_m) | PG_M | PG_A | PG_RW | PG_V |
+ pmap_cache_bits(VM_MEMATTR_DEFAULT, TRUE);
+}
+
+void *
+pmap_trm_alloc(size_t size, int flags)
+{
+ vmem_addr_t res;
+ int error;
+
+ MPASS((flags & ~(M_WAITOK | M_NOWAIT | M_ZERO)) == 0);
+ error = vmem_xalloc(pmap_trm_arena, roundup2(size, 4), sizeof(int),
+ 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, flags | M_FIRSTFIT, &res);
+ if (error != 0)
+ return (NULL);
+ return ((void *)res);
+}
+
+void
+pmap_trm_free(void *addr, size_t size)
+{
+
+ vmem_free(pmap_trm_arena, (uintptr_t)addr, roundup2(size, 4));
+}
+
#if defined(PMAP_DEBUG)
pmap_pid_dump(int pid)
{
Index: sys/i386/i386/sigtramp.s
===================================================================
--- sys/i386/i386/sigtramp.s
+++ sys/i386/i386/sigtramp.s
@@ -95,6 +95,25 @@
pushl %eax /* junk to fake return addr. */
int $0x80 /* enter kernel with args */
0: jmp 0b
+
+/*
+ * Our lcall $7,$0 handler remains in user mode (ring 3), since lcalls
+ * don't change the interrupt mask, so if this one went directly to the
+ * kernel then there would be a window with interrupts enabled in kernel
+ * mode, and all interrupt handlers would have to be almost as complicated
+ * as the NMI handler to support this.
+ *
+ * Instead, convert the lcall to an int0x80 call. The kernel does most
+ * of the conversion by popping the lcall return values off the user
+ * stack and returning to them instead of to here, except when the
+ * conversion itself fails. Adjusting the stack here is impossible for
+ * vfork() and harder for other syscalls.
+ */
+ ALIGN_TEXT
+lcall_tramp:
+ int $0x80
+1: jmp 1b
+
#endif /* COMPAT_43 */
ALIGN_TEXT
@@ -113,4 +132,7 @@
.globl szosigcode
szosigcode:
.long esigcode-osigcode
+ .globl sz_lcall_tramp
+sz_lcall_tramp:
+ .long esigcode-lcall_tramp
#endif
Index: sys/i386/i386/support.s
===================================================================
--- sys/i386/i386/support.s
+++ sys/i386/i386/support.s
@@ -251,196 +251,6 @@
ret
END(memcpy)
-/*****************************************************************************/
-/* copyout and fubyte family */
-/*****************************************************************************/
-/*
- * Access user memory from inside the kernel. These routines and possibly
- * the math- and DOS emulators should be the only places that do this.
- *
- * We have to access the memory with user's permissions, so use a segment
- * selector with RPL 3. For writes to user space we have to additionally
- * check the PTE for write permission, because the 386 does not check
- * write permissions when we are executing with EPL 0. The 486 does check
- * this if the WP bit is set in CR0, so we can use a simpler version here.
- *
- * These routines set curpcb->pcb_onfault for the time they execute. When a
- * protection violation occurs inside the functions, the trap handler
- * returns to *curpcb->pcb_onfault instead of the function.
- */
-
-/*
- * copyout(from_kernel, to_user, len) - MP SAFE
- */
-ENTRY(copyout)
- movl PCPU(CURPCB),%eax
- movl $copyout_fault,PCB_ONFAULT(%eax)
- pushl %esi
- pushl %edi
- pushl %ebx
- movl 16(%esp),%esi
- movl 20(%esp),%edi
- movl 24(%esp),%ebx
- testl %ebx,%ebx /* anything to do? */
- jz done_copyout
-
- /*
- * Check explicitly for non-user addresses. This check is essential
- * because it prevents usermode from writing into the kernel. We do
- * not verify anywhere else that the user did not specify a rogue
- * address.
- */
- /*
- * First, prevent address wrapping.
- */
- movl %edi,%eax
- addl %ebx,%eax
- jc copyout_fault
-/*
- * XXX STOP USING VM_MAXUSER_ADDRESS.
- * It is an end address, not a max, so every time it is used correctly it
- * looks like there is an off by one error, and of course it caused an off
- * by one error in several places.
- */
- cmpl $VM_MAXUSER_ADDRESS,%eax
- ja copyout_fault
-
- /* bcopy(%esi, %edi, %ebx) */
- movl %ebx,%ecx
-
- shrl $2,%ecx
- rep
- movsl
- movb %bl,%cl
- andb $3,%cl
- rep
- movsb
-
-done_copyout:
- popl %ebx
- popl %edi
- popl %esi
- xorl %eax,%eax
- movl PCPU(CURPCB),%edx
- movl %eax,PCB_ONFAULT(%edx)
- ret
-END(copyout)
-
- ALIGN_TEXT
-copyout_fault:
- popl %ebx
- popl %edi
- popl %esi
- movl PCPU(CURPCB),%edx
- movl $0,PCB_ONFAULT(%edx)
- movl $EFAULT,%eax
- ret
-
-/*
- * copyin(from_user, to_kernel, len) - MP SAFE
- */
-ENTRY(copyin)
- movl PCPU(CURPCB),%eax
- movl $copyin_fault,PCB_ONFAULT(%eax)
- pushl %esi
- pushl %edi
- movl 12(%esp),%esi /* caddr_t from */
- movl 16(%esp),%edi /* caddr_t to */
- movl 20(%esp),%ecx /* size_t len */
-
- /*
- * make sure address is valid
- */
- movl %esi,%edx
- addl %ecx,%edx
- jc copyin_fault
- cmpl $VM_MAXUSER_ADDRESS,%edx
- ja copyin_fault
-
- movb %cl,%al
- shrl $2,%ecx /* copy longword-wise */
- rep
- movsl
- movb %al,%cl
- andb $3,%cl /* copy remaining bytes */
- rep
- movsb
-
- popl %edi
- popl %esi
- xorl %eax,%eax
- movl PCPU(CURPCB),%edx
- movl %eax,PCB_ONFAULT(%edx)
- ret
-END(copyin)
-
- ALIGN_TEXT
-copyin_fault:
- popl %edi
- popl %esi
- movl PCPU(CURPCB),%edx
- movl $0,PCB_ONFAULT(%edx)
- movl $EFAULT,%eax
- ret
-
-/*
- * casueword. Compare and set user word. Returns -1 on fault,
- * 0 on non-faulting access. The current value is in *oldp.
- */
-ALTENTRY(casueword32)
-ENTRY(casueword)
- movl PCPU(CURPCB),%ecx
- movl $fusufault,PCB_ONFAULT(%ecx)
- movl 4(%esp),%edx /* dst */
- movl 8(%esp),%eax /* old */
- movl 16(%esp),%ecx /* new */
-
- cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */
- ja fusufault
-
-#ifdef SMP
- lock
-#endif
- cmpxchgl %ecx,(%edx) /* Compare and set. */
-
- /*
- * The old value is in %eax. If the store succeeded it will be the
- * value we expected (old) from before the store, otherwise it will
- * be the current value.
- */
-
- movl PCPU(CURPCB),%ecx
- movl $0,PCB_ONFAULT(%ecx)
- movl 12(%esp),%edx /* oldp */
- movl %eax,(%edx)
- xorl %eax,%eax
- ret
-END(casueword32)
-END(casueword)
-
-/*
- * Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user
- * memory.
- */
-
-ALTENTRY(fueword32)
-ENTRY(fueword)
- movl PCPU(CURPCB),%ecx
- movl $fusufault,PCB_ONFAULT(%ecx)
- movl 4(%esp),%edx /* from */
-
- cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */
- ja fusufault
-
- movl (%edx),%eax
- movl $0,PCB_ONFAULT(%ecx)
- movl 8(%esp),%edx
- movl %eax,(%edx)
- xorl %eax,%eax
- ret
-END(fueword32)
-END(fueword)
-
/*
* fuswintr() and suswintr() are specialized variants of fuword16() and
* suword16(), respectively. They are called from the profiling code,
@@ -455,167 +265,6 @@
END(suswintr)
END(fuswintr)
-ENTRY(fuword16)
- movl PCPU(CURPCB),%ecx
- movl $fusufault,PCB_ONFAULT(%ecx)
- movl 4(%esp),%edx
-
- cmpl $VM_MAXUSER_ADDRESS-2,%edx
- ja fusufault
-
- movzwl (%edx),%eax
- movl $0,PCB_ONFAULT(%ecx)
- ret
-END(fuword16)
-
-ENTRY(fubyte)
- movl PCPU(CURPCB),%ecx
- movl $fusufault,PCB_ONFAULT(%ecx)
- movl 4(%esp),%edx
-
- cmpl $VM_MAXUSER_ADDRESS-1,%edx
- ja fusufault
-
- movzbl (%edx),%eax
- movl $0,PCB_ONFAULT(%ecx)
- ret
-END(fubyte)
-
- ALIGN_TEXT
-fusufault:
- movl PCPU(CURPCB),%ecx
- xorl %eax,%eax
- movl %eax,PCB_ONFAULT(%ecx)
- decl %eax
- ret
-
-/*
- * Store a 32-bit word, a 16-bit word, or an 8-bit byte to user memory.
- * All these functions are MPSAFE.
- */
-
-ALTENTRY(suword32)
-ENTRY(suword)
- movl PCPU(CURPCB),%ecx
- movl $fusufault,PCB_ONFAULT(%ecx)
- movl 4(%esp),%edx
-
- cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */
- ja fusufault
-
- movl 8(%esp),%eax
- movl %eax,(%edx)
- xorl %eax,%eax
- movl PCPU(CURPCB),%ecx
- movl %eax,PCB_ONFAULT(%ecx)
- ret
-END(suword32)
-END(suword)
-
-ENTRY(suword16)
- movl PCPU(CURPCB),%ecx
- movl $fusufault,PCB_ONFAULT(%ecx)
- movl 4(%esp),%edx
-
- cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */
- ja fusufault
-
- movw 8(%esp),%ax
- movw %ax,(%edx)
- xorl %eax,%eax
- movl PCPU(CURPCB),%ecx /* restore trashed register */
- movl %eax,PCB_ONFAULT(%ecx)
- ret
-END(suword16)
-
-ENTRY(subyte)
- movl PCPU(CURPCB),%ecx
- movl $fusufault,PCB_ONFAULT(%ecx)
- movl 4(%esp),%edx
-
- cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */
- ja fusufault
-
- movb 8(%esp),%al
- movb %al,(%edx)
- xorl %eax,%eax
- movl PCPU(CURPCB),%ecx /* restore trashed register */
- movl %eax,PCB_ONFAULT(%ecx)
- ret
-END(subyte)
-
-/*
- * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
- *
- * copy a string from 'from' to 'to', stop when a 0 character is reached.
- * return ENAMETOOLONG if string is longer than maxlen, and
- * EFAULT on protection violations. If lencopied is non-zero,
- * return the actual length in *lencopied.
- */
-ENTRY(copyinstr)
- pushl %esi
- pushl %edi
- movl PCPU(CURPCB),%ecx
- movl $cpystrflt,PCB_ONFAULT(%ecx)
-
- movl 12(%esp),%esi /* %esi = from */
- movl 16(%esp),%edi /* %edi = to */
- movl 20(%esp),%edx /* %edx = maxlen */
-
- movl $VM_MAXUSER_ADDRESS,%eax
-
- /* make sure 'from' is within bounds */
- subl %esi,%eax
- jbe cpystrflt
-
- /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
- cmpl %edx,%eax
- jae 1f
- movl %eax,%edx
- movl %eax,20(%esp)
-1:
- incl %edx
-
-2:
- decl %edx
- jz 3f
-
- lodsb
- stosb
- orb %al,%al
- jnz 2b
-
- /* Success -- 0 byte reached */
- decl %edx
- xorl %eax,%eax
- jmp cpystrflt_x
-3:
- /* edx is zero - return ENAMETOOLONG or EFAULT */
- cmpl $VM_MAXUSER_ADDRESS,%esi
- jae cpystrflt
-4:
- movl $ENAMETOOLONG,%eax
- jmp cpystrflt_x
-
-cpystrflt:
- movl $EFAULT,%eax
-
-cpystrflt_x:
- /* set *lencopied and return %eax */
- movl PCPU(CURPCB),%ecx
- movl $0,PCB_ONFAULT(%ecx)
- movl 20(%esp),%ecx
- subl %edx,%ecx
- movl 24(%esp),%edx
- testl %edx,%edx
- jz 1f
- movl %ecx,(%edx)
-1:
- popl %edi
- popl %esi
- ret
-END(copyinstr)
-
/*
* copystr(from, to, maxlen, int *lencopied) - MP SAFE
*/
Index: sys/i386/i386/swtch.s
===================================================================
--- sys/i386/i386/swtch.s
+++ sys/i386/i386/swtch.s
@@ -86,8 +86,6 @@
1:
movl 8(%esp),%ecx /* New thread */
movl TD_PCB(%ecx),%edx
- movl PCB_CR3(%edx),%eax
- movl %eax,%cr3
/* set bit in new pm_active */
movl TD_PROC(%ecx),%eax
movl P_VMSPACE(%eax), %ebx
@@ -157,7 +155,7 @@
popl %eax
1:
- /* Save is done. Now fire up new thread. Leave old vmspace. */
+ /* Save is done. Now fire up new thread. */
movl 4(%esp),%edi
movl 8(%esp),%ecx /* New thread */
movl 12(%esp),%esi /* New lock */
@@ -167,15 +165,10 @@
#endif
movl TD_PCB(%ecx),%edx
- /* switch address space */
- movl PCB_CR3(%edx),%eax
- movl %cr3,%ebx /* The same address space? */
- cmpl %ebx,%eax
- je sw0
- movl %eax,%cr3 /* new address space */
+ /* Switchout td_lock */
movl %esi,%eax
movl PCPU(CPUID),%esi
- SETOP %eax,TD_LOCK(%edi) /* Switchout td_lock */
+ SETOP %eax,TD_LOCK(%edi)
/* Release bit from old pmap->pm_active */
movl PCPU(CURPMAP), %ebx
@@ -200,26 +193,28 @@
sw1:
BLOCK_SPIN(%ecx)
/*
- * At this point, we've switched address spaces and are ready
+ * At this point, we have managed thread locks and are ready
* to load up the rest of the next context.
*/
+
+ /* Load a pointer to the thread kernel stack into PCPU. */
+ leal -VM86_STACK_SPACE(%edx), %eax /* leave space for vm86 */
+ movl %eax, PCPU(KESP0)
+
cmpl $0, PCB_EXT(%edx) /* has pcb extension? */
je 1f /* If not, use the default */
movl $1, PCPU(PRIVATE_TSS) /* mark use of private tss */
movl PCB_EXT(%edx), %edi /* new tss descriptor */
+ movl PCPU(TRAMPSTK), %ebx
+ movl %ebx, PCB_EXT_TSS+TSS_ESP0(%edi)
jmp 2f /* Load it up */
1: /*
* Use the common default TSS instead of our own.
- * Set our stack pointer into the TSS, it's set to just
- * below the PCB. In C, common_tss.tss_esp0 = &pcb - 16;
- */
- leal -16(%edx), %ebx /* leave space for vm86 */
- movl %ebx, PCPU(COMMON_TSS) + TSS_ESP0
-
- /*
- * Test this CPU's bit in the bitmap to see if this
- * CPU was using a private TSS.
+ * Stack pointer in the common TSS points to the trampoline stack
+ * already and should be not changed.
+ *
+ * Test this CPU's flag to see if this CPU was using a private TSS.
*/
cmpl $0, PCPU(PRIVATE_TSS) /* Already using the common? */
je 3f /* if so, skip reloading */
Index: sys/i386/i386/sys_machdep.c
===================================================================
--- sys/i386/i386/sys_machdep.c
+++ sys/i386/i386/sys_machdep.c
@@ -294,10 +294,8 @@
0 /* granularity */
};
- ext = (struct pcb_ext *)kmem_malloc(kernel_arena, ctob(IOPAGES+1),
- M_WAITOK | M_ZERO);
+ ext = pmap_trm_alloc(ctob(IOPAGES + 1), M_WAITOK | M_ZERO);
/* -16 is so we can convert a trapframe into vm86trapframe inplace */
- ext->ext_tss.tss_esp0 = (vm_offset_t)td->td_pcb - 16;
ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
/*
* The last byte of the i/o map must be followed by an 0xff byte.
@@ -323,6 +321,7 @@
/* Switch to the new TSS. */
critical_enter();
+ ext->ext_tss.tss_esp0 = PCPU_GET(trampstk);
td->td_pcb->pcb_ext = ext;
PCPU_SET(private_tss, 1);
*PCPU_GET(tss_gdt) = ext->ext_tssd;
@@ -457,8 +456,8 @@
new_ldt = malloc(sizeof(struct proc_ldt), M_SUBPROC, M_WAITOK);
new_ldt->ldt_len = len = NEW_MAX_LD(len);
- new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena,
- len * sizeof(union descriptor), M_WAITOK | M_ZERO);
+ new_ldt->ldt_base = pmap_trm_alloc(len * sizeof(union descriptor),
+ M_WAITOK | M_ZERO);
new_ldt->ldt_refcnt = 1;
new_ldt->ldt_active = 0;
@@ -473,7 +472,7 @@
bcopy(pldt->ldt_base, new_ldt->ldt_base,
len * sizeof(union descriptor));
} else
- bcopy(ldt, new_ldt->ldt_base, sizeof(ldt));
+ bcopy(ldt, new_ldt->ldt_base, sizeof(union descriptor) * NLDT);
return (new_ldt);
}
@@ -510,8 +509,8 @@
mtx_assert(&dt_lock, MA_OWNED);
if (--pldt->ldt_refcnt == 0) {
mtx_unlock_spin(&dt_lock);
- kmem_free(kernel_arena, (vm_offset_t)pldt->ldt_base,
- pldt->ldt_len * sizeof(union descriptor));
+ pmap_trm_free(pldt->ldt_base, pldt->ldt_len *
+ sizeof(union descriptor));
free(pldt, M_SUBPROC);
} else
mtx_unlock_spin(&dt_lock);
@@ -767,8 +766,7 @@
* free the new object and return.
*/
mtx_unlock_spin(&dt_lock);
- kmem_free(kernel_arena,
- (vm_offset_t)new_ldt->ldt_base,
+ pmap_trm_free(new_ldt->ldt_base,
new_ldt->ldt_len * sizeof(union descriptor));
free(new_ldt, M_SUBPROC);
mtx_lock_spin(&dt_lock);
@@ -801,8 +799,8 @@
mtx_unlock_spin(&dt_lock);
#endif
if (old_ldt_base != NULL_LDT_BASE) {
- kmem_free(kernel_arena, (vm_offset_t)old_ldt_base,
- old_ldt_len * sizeof(union descriptor));
+ pmap_trm_free(old_ldt_base, old_ldt_len *
+ sizeof(union descriptor));
free(new_ldt, M_SUBPROC);
}
mtx_lock_spin(&dt_lock);
Index: sys/i386/i386/trap.c
===================================================================
--- sys/i386/i386/trap.c
+++ sys/i386/i386/trap.c
@@ -47,6 +47,7 @@
*/
#include "opt_clock.h"
+#include "opt_compat.h"
#include "opt_cpu.h"
#include "opt_hwpmc_hooks.h"
#include "opt_isa.h"
@@ -117,45 +118,60 @@
static void trap_fatal(struct trapframe *, vm_offset_t);
void dblfault_handler(void);
-extern inthand_t IDTVEC(lcall_syscall);
-
#define MAX_TRAP_MSG 32
-static char *trap_msg[] = {
- "", /* 0 unused */
- "privileged instruction fault", /* 1 T_PRIVINFLT */
- "", /* 2 unused */
- "breakpoint instruction fault", /* 3 T_BPTFLT */
- "", /* 4 unused */
- "", /* 5 unused */
- "arithmetic trap", /* 6 T_ARITHTRAP */
- "", /* 7 unused */
- "", /* 8 unused */
- "general protection fault", /* 9 T_PROTFLT */
- "trace trap", /* 10 T_TRCTRAP */
- "", /* 11 unused */
- "page fault", /* 12 T_PAGEFLT */
- "", /* 13 unused */
- "alignment fault", /* 14 T_ALIGNFLT */
- "", /* 15 unused */
- "", /* 16 unused */
- "", /* 17 unused */
- "integer divide fault", /* 18 T_DIVIDE */
- "non-maskable interrupt trap", /* 19 T_NMI */
- "overflow trap", /* 20 T_OFLOW */
- "FPU bounds check fault", /* 21 T_BOUND */
- "FPU device not available", /* 22 T_DNA */
- "double fault", /* 23 T_DOUBLEFLT */
- "FPU operand fetch fault", /* 24 T_FPOPFLT */
- "invalid TSS fault", /* 25 T_TSSFLT */
- "segment not present fault", /* 26 T_SEGNPFLT */
- "stack fault", /* 27 T_STKFLT */
- "machine check trap", /* 28 T_MCHK */
- "SIMD floating-point exception", /* 29 T_XMMFLT */
- "reserved (unknown) fault", /* 30 T_RESERVED */
- "", /* 31 unused (reserved) */
- "DTrace pid return trap", /* 32 T_DTRACE_RET */
+
+struct trap_data {
+ bool ei;
+ const char *msg;
+};
+
+static const struct trap_data trap_data[] = {
+ [T_PRIVINFLT] = { .ei = true, .msg = "privileged instruction fault" },
+ [T_BPTFLT] = { .ei = false, .msg = "breakpoint instruction fault" },
+ [T_ARITHTRAP] = { .ei = true, .msg = "arithmetic trap" },
+ [T_PROTFLT] = { .ei = true, .msg = "general protection fault" },
+ [T_TRCTRAP] = { .ei = false, .msg = "trace trap" },
+ [T_PAGEFLT] = { .ei = true, .msg = "page fault" },
+ [T_ALIGNFLT] = { .ei = true, .msg = "alignment fault" },
+ [T_DIVIDE] = { .ei = true, .msg = "integer divide fault" },
+ [T_NMI] = { .ei = false, .msg = "non-maskable interrupt trap" },
+ [T_OFLOW] = { .ei = true, .msg = "overflow trap" },
+ [T_BOUND] = { .ei = true, .msg = "FPU bounds check fault" },
+ [T_DNA] = { .ei = true, .msg = "FPU device not available" },
+ [T_DOUBLEFLT] = { .ei = false, .msg = "double fault" },
+ [T_FPOPFLT] = { .ei = true, .msg = "FPU operand fetch fault" },
+ [T_TSSFLT] = { .ei = true, .msg = "invalid TSS fault" },
+ [T_SEGNPFLT] = { .ei = true, .msg = "segment not present fault" },
+ [T_STKFLT] = { .ei = true, .msg = "stack fault" },
+ [T_MCHK] = { .ei = true, .msg = "machine check trap" },
+ [T_XMMFLT] = { .ei = true, .msg = "SIMD floating-point exception" },
+ [T_DTRACE_RET] ={ .ei = true, .msg = "DTrace pid return trap" },
};
+static bool
+trap_enable_intr(int trapno)
+{
+
+ MPASS(trapno > 0);
+ if (trapno < nitems(trap_data) && trap_data[trapno].msg != NULL)
+ return (trap_data[trapno].ei);
+ return (false);
+}
+
+static const char *
+trap_msg(int trapno)
+{
+ const char *res;
+ static const char unkn[] = "UNKNOWN";
+
+ res = NULL;
+ if (trapno < nitems(trap_data))
+ res = trap_data[trapno].msg;
+ if (res == NULL)
+ res = unkn;
+ return (res);
+}
+
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
int has_f00f_bug = 0; /* Initialized so that it can be patched. */
#endif
@@ -201,6 +217,9 @@
VM_CNT_INC(v_trap);
type = frame->tf_trapno;
+ KASSERT((read_eflags() & PSL_I) == 0,
+ ("trap: interrupts enaabled, type %d frame %p", type, frame));
+
#ifdef SMP
/* Handler for NMI IPIs used for stopping CPUs. */
if (type == T_NMI && ipi_nmi_handler() == 0)
@@ -257,53 +276,34 @@
return;
#endif
- if ((frame->tf_eflags & PSL_I) == 0) {
- /*
- * Buggy application or kernel code has disabled
- * interrupts and then trapped. Enabling interrupts
- * now is wrong, but it is better than running with
- * interrupts disabled until they are accidentally
- * enabled later.
- */
- if (TRAPF_USERMODE(frame) &&
- (curpcb->pcb_flags & PCB_VM86CALL) == 0)
- uprintf(
- "pid %ld (%s): trap %d with interrupts disabled\n",
- (long)curproc->p_pid, curthread->td_name, type);
- else if (type != T_NMI && type != T_BPTFLT &&
- type != T_TRCTRAP &&
- frame->tf_eip != (int)cpu_switch_load_gs) {
- /*
- * XXX not quite right, since this may be for a
- * multiple fault in user mode.
- */
- printf("kernel trap %d with interrupts disabled\n",
- type);
- /*
- * Page faults need interrupts disabled until later,
- * and we shouldn't enable interrupts while holding
- * a spin lock.
- */
- if (type != T_PAGEFLT &&
- td->td_md.md_spinlock_count == 0)
- enable_intr();
- }
- }
- eva = 0;
- if (type == T_PAGEFLT) {
- /*
- * For some Cyrix CPUs, %cr2 is clobbered by
- * interrupts. This problem is worked around by using
- * an interrupt gate for the pagefault handler. We
- * are finally ready to read %cr2 and conditionally
- * reenable interrupts. If we hold a spin lock, then
- * we must not reenable interrupts. This might be a
- * spurious page fault.
- */
+ /*
+ * We must not allow context switches until %cr2 is read.
+ * Also, for some Cyrix CPUs, %cr2 is clobbered by interrupts.
+ * All faults use interrupt gates, so %cr2 can be safely read
+ * now, before optional enable of the interrupts below.
+ */
+ if (type == T_PAGEFLT)
eva = rcr2();
- if (td->td_md.md_spinlock_count == 0)
- enable_intr();
- }
+
+ /*
+ * Buggy application or kernel code has disabled interrupts
+ * and then trapped. Enabling interrupts now is wrong, but it
+ * is better than running with interrupts disabled until they
+ * are accidentally enabled later.
+ */
+ if ((frame->tf_eflags & PSL_I) == 0 && TRAPF_USERMODE(frame) &&
+ (curpcb->pcb_flags & PCB_VM86CALL) == 0)
+ uprintf("pid %ld (%s): trap %d with interrupts disabled\n",
+ (long)curproc->p_pid, curthread->td_name, type);
+
+ /*
+ * Conditionally reenable interrupts. If we hold a spin lock,
+ * then we must not reenable interrupts. This might be a
+ * spurious page fault.
+ */
+ if (trap_enable_intr(type) && td->td_md.md_spinlock_count == 0 &&
+ frame->tf_eip != (int)cpu_switch_load_gs)
+ enable_intr();
if (TRAPF_USERMODE(frame) && (curpcb->pcb_flags & PCB_VM86CALL) == 0) {
/* user trap */
@@ -583,24 +583,40 @@
* problem here and not have to check all the
* selectors and pointers when the user changes
* them.
+ *
+ * N.B. Comparing to long mode, 32-bit mode
+ * does not push %esp on the trap frame,
+ * because iretl faulted while in ring 0. As
+ * the consequence, there is no need to fixup
+ * the stack pointer for doreti_iret_fault,
+ * the fixup and the complimentary trap() call
+ * are executed on the main thread stack, not
+ * on the trampoline stack.
*/
- if (frame->tf_eip == (int)doreti_iret) {
- frame->tf_eip = (int)doreti_iret_fault;
+ if (frame->tf_eip == (int)doreti_iret + setidt_disp) {
+ frame->tf_eip = (int)doreti_iret_fault +
+ setidt_disp;
return;
}
if (type == T_STKFLT)
break;
- if (frame->tf_eip == (int)doreti_popl_ds) {
- frame->tf_eip = (int)doreti_popl_ds_fault;
+ if (frame->tf_eip == (int)doreti_popl_ds +
+ setidt_disp) {
+ frame->tf_eip = (int)doreti_popl_ds_fault +
+ setidt_disp;
return;
}
- if (frame->tf_eip == (int)doreti_popl_es) {
- frame->tf_eip = (int)doreti_popl_es_fault;
+ if (frame->tf_eip == (int)doreti_popl_es +
+ setidt_disp) {
+ frame->tf_eip = (int)doreti_popl_es_fault +
+ setidt_disp;
return;
}
- if (frame->tf_eip == (int)doreti_popl_fs) {
- frame->tf_eip = (int)doreti_popl_fs_fault;
+ if (frame->tf_eip == (int)doreti_popl_fs +
+ setidt_disp) {
+ frame->tf_eip = (int)doreti_popl_fs_fault +
+ setidt_disp;
return;
}
if (curpcb->pcb_onfault != NULL) {
@@ -627,23 +643,6 @@
case T_TRCTRAP: /* trace trap */
kernel_trctrap:
- if (frame->tf_eip == (int)IDTVEC(lcall_syscall)) {
- /*
- * We've just entered system mode via the
- * syscall lcall. Continue single stepping
- * silently until the syscall handler has
- * saved the flags.
- */
- return;
- }
- if (frame->tf_eip == (int)IDTVEC(lcall_syscall) + 1) {
- /*
- * The syscall handler has now saved the
- * flags. Stop single stepping it.
- */
- frame->tf_eflags &= ~PSL_T;
- return;
- }
/*
* Ignore debug register trace traps due to
* accesses in the user's address space, which
@@ -711,10 +710,11 @@
ksi.ksi_trapno = type;
if (uprintf_signal) {
uprintf("pid %d comm %s: signal %d err %x code %d type %d "
- "addr 0x%x esp 0x%08x eip 0x%08x "
+ "addr 0x%x ss 0x%04x esp 0x%08x cs 0x%04x eip 0x%08x "
"<%02x %02x %02x %02x %02x %02x %02x %02x>\n",
p->p_pid, p->p_comm, signo, frame->tf_err, ucode, type,
- addr, frame->tf_esp, frame->tf_eip,
+ addr, frame->tf_ss, frame->tf_esp, frame->tf_cs,
+ frame->tf_eip,
fubyte((void *)(frame->tf_eip + 0)),
fubyte((void *)(frame->tf_eip + 1)),
fubyte((void *)(frame->tf_eip + 2)),
@@ -791,7 +791,7 @@
}
}
va = trunc_page(eva);
- if (va >= KERNBASE) {
+ if (va >= PMAP_TRM_MIN_ADDRESS) {
/*
* Don't allow user-mode faults in kernel address space.
* An exception: if the faulting address is the invalid
@@ -806,20 +806,17 @@
#endif
if (usermode)
return (SIGSEGV);
-
- map = kernel_map;
+ trap_fatal(frame, eva);
+ return (-1);
} else {
- map = &p->p_vmspace->vm_map;
+ map = usermode ? &p->p_vmspace->vm_map : kernel_map;
/*
- * When accessing a user-space address, kernel must be
- * ready to accept the page fault, and provide a
- * handling routine. Since accessing the address
- * without the handler is a bug, do not try to handle
- * it normally, and panic immediately.
+ * Kernel cannot access a user-space address directly
+ * because user pages are not mapped. Also, page
+ * faults must not be caused during the interrupts.
*/
- if (!usermode && (td->td_intr_nesting_level != 0 ||
- curpcb->pcb_onfault == NULL)) {
+ if (!usermode && td->td_intr_nesting_level != 0) {
trap_fatal(frame, eva);
return (-1);
}
@@ -882,17 +879,12 @@
int code, ss, esp;
u_int type;
struct soft_segment_descriptor softseg;
- char *msg;
code = frame->tf_err;
type = frame->tf_trapno;
sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg);
- if (type <= MAX_TRAP_MSG)
- msg = trap_msg[type];
- else
- msg = "UNKNOWN";
- printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg,
+ printf("\n\nFatal trap %d: %s while in %s mode\n", type, trap_msg(type),
frame->tf_eflags & PSL_VM ? "vm86" :
ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
#ifdef SMP
@@ -955,8 +947,8 @@
}
#endif
printf("trap number = %d\n", type);
- if (type <= MAX_TRAP_MSG)
- panic("%s", trap_msg[type]);
+ if (trap_msg(type) != NULL)
+ panic("%s", trap_msg(type));
else
panic("unknown/reserved trap");
}
@@ -974,16 +966,16 @@
* of this is that "trace <ebp>" in ddb won't work.
*/
void
-dblfault_handler()
+dblfault_handler(void)
{
#ifdef KDTRACE_HOOKS
if (dtrace_doubletrap_func != NULL)
(*dtrace_doubletrap_func)();
#endif
printf("\nFatal double fault:\n");
- printf("eip = 0x%x\n", PCPU_GET(common_tss.tss_eip));
- printf("esp = 0x%x\n", PCPU_GET(common_tss.tss_esp));
- printf("ebp = 0x%x\n", PCPU_GET(common_tss.tss_ebp));
+ printf("eip = 0x%x\n", PCPU_GET(common_tssp)->tss_eip);
+ printf("esp = 0x%x\n", PCPU_GET(common_tssp)->tss_esp);
+ printf("ebp = 0x%x\n", PCPU_GET(common_tssp)->tss_ebp);
#ifdef SMP
/* two separate prints in case of a trap on an unmapped page */
printf("cpuid = %d; ", PCPU_GET(cpuid));
@@ -1001,13 +993,42 @@
caddr_t params;
long tmp;
int error;
+#ifdef COMPAT_43
+ u_int32_t eip;
+ int cs;
+#endif
p = td->td_proc;
frame = td->td_frame;
sa = &td->td_sa;
- params = (caddr_t)frame->tf_esp + sizeof(int);
+#ifdef COMPAT_43
+ if (__predict_false(frame->tf_cs == 7 && frame->tf_eip == 2)) {
+ /*
+ * In lcall $7,$0 after int $0x80. Convert the user
+ * frame to what it would be for a direct int 0x80 instead
+ * of lcall $7,$0, by popping the lcall return address.
+ */
+ error = fueword32((void *)frame->tf_esp, &eip);
+ if (error == -1)
+ return (EFAULT);
+ cs = fuword16((void *)(frame->tf_esp + sizeof(u_int32_t)));
+ if (cs == -1)
+ return (EFAULT);
+
+ /*
+ * Unwind in-kernel frame after all stack frame pieces
+ * were successfully read.
+ */
+ frame->tf_eip = eip;
+ frame->tf_cs = cs;
+ frame->tf_esp += 2 * sizeof(u_int32_t);
+ frame->tf_err = 7; /* size of lcall $7,$0 */
+ }
+#endif
+
sa->code = frame->tf_eax;
+ params = (caddr_t)frame->tf_esp + sizeof(uint32_t);
/*
* Need to check if this is a 32 bit or 64 bit syscall.
@@ -1020,7 +1041,7 @@
if (error == -1)
return (EFAULT);
sa->code = tmp;
- params += sizeof(int);
+ params += sizeof(uint32_t);
} else if (sa->code == SYS___syscall) {
/*
* Like syscall, but code is a quad, so as to maintain
@@ -1043,7 +1064,7 @@
if (params != NULL && sa->narg != 0)
error = copyin(params, (caddr_t)sa->args,
- (u_int)(sa->narg * sizeof(int)));
+ (u_int)(sa->narg * sizeof(uint32_t)));
else
error = 0;
Index: sys/i386/i386/vm86.c
===================================================================
--- sys/i386/i386/vm86.c
+++ sys/i386/i386/vm86.c
@@ -78,6 +78,55 @@
#define PUSH_MASK ~(PSL_VM | PSL_RF | PSL_I)
#define POP_MASK ~(PSL_VIP | PSL_VIF | PSL_VM | PSL_RF | PSL_IOPL)
+static int
+vm86_suword16(volatile void *base, int word)
+{
+
+ if (curthread->td_critnest != 0) {
+ *(volatile uint16_t *)base = word;
+ return (0);
+ }
+ return (suword16(base, word));
+}
+
+static int
+vm86_suword(volatile void *base, long word)
+{
+
+ if (curthread->td_critnest != 0) {
+ *(volatile long *)base = word;
+ return (0);
+ }
+ return (suword(base, word));
+}
+
+static int
+vm86_fubyte(volatile const void *base)
+{
+
+ if (curthread->td_critnest != 0)
+ return (*(volatile const u_char *)base);
+ return (fubyte(base));
+}
+
+static int
+vm86_fuword16(volatile const void *base)
+{
+
+ if (curthread->td_critnest != 0)
+ return (*(volatile const uint16_t *)base);
+ return (fuword16(base));
+}
+
+static long
+vm86_fuword(volatile const void *base)
+{
+
+ if (curthread->td_critnest != 0)
+ return (*(volatile const long *)base);
+ return (fuword(base));
+}
+
static __inline caddr_t
MAKE_ADDR(u_short sel, u_short off)
{
@@ -101,20 +150,20 @@
PUSH(u_short x, struct vm86frame *vmf)
{
vmf->vmf_sp -= 2;
- suword16(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
+ vm86_suword16(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
}
static __inline void
PUSHL(u_int x, struct vm86frame *vmf)
{
vmf->vmf_sp -= 4;
- suword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
+ vm86_suword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
}
static __inline u_short
POP(struct vm86frame *vmf)
{
- u_short x = fuword16(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
+ u_short x = vm86_fuword16(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
vmf->vmf_sp += 2;
return (x);
@@ -123,7 +172,7 @@
static __inline u_int
POPL(struct vm86frame *vmf)
{
- u_int x = fuword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
+ u_int x = vm86_fuword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
vmf->vmf_sp += 4;
return (x);
@@ -152,16 +201,16 @@
retcode = SIGTRAP;
addr = MAKE_ADDR(vmf->vmf_cs, vmf->vmf_ip);
- i_byte = fubyte(addr);
+ i_byte = vm86_fubyte(addr);
if (i_byte == ADDRESS_SIZE_PREFIX) {
- i_byte = fubyte(++addr);
+ i_byte = vm86_fubyte(++addr);
inc_ip++;
}
if (vm86->vm86_has_vme) {
switch (i_byte) {
case OPERAND_SIZE_PREFIX:
- i_byte = fubyte(++addr);
+ i_byte = vm86_fubyte(++addr);
inc_ip++;
switch (i_byte) {
case PUSHF:
@@ -241,7 +290,7 @@
switch (i_byte) {
case OPERAND_SIZE_PREFIX:
- i_byte = fubyte(++addr);
+ i_byte = vm86_fubyte(++addr);
inc_ip++;
switch (i_byte) {
case PUSHF:
@@ -293,7 +342,7 @@
return (retcode);
case INTn:
- i_byte = fubyte(addr + 1);
+ i_byte = vm86_fubyte(addr + 1);
if ((vm86->vm86_intmap[i_byte >> 3] & (1 << (i_byte & 7))) != 0)
break;
if (vm86->vm86_eflags & PSL_VIF)
@@ -303,7 +352,7 @@
PUSH((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL, vmf);
PUSH(vmf->vmf_cs, vmf);
PUSH(vmf->vmf_ip + inc_ip + 1, vmf); /* increment IP */
- GET_VEC(fuword((caddr_t)(i_byte * 4)),
+ GET_VEC(vm86_fuword((caddr_t)(i_byte * 4)),
&vmf->vmf_cs, &vmf->vmf_ip);
vmf->vmf_flags &= ~PSL_T;
vm86->vm86_eflags &= ~PSL_VIF;
@@ -548,6 +597,7 @@
void
vm86_trap(struct vm86frame *vmf)
{
+ void (*p)(struct vm86frame *);
caddr_t addr;
/* "should not happen" */
@@ -560,21 +610,26 @@
else
vmf->vmf_trapno = vmf->vmf_trapno << 16;
- vm86_biosret(vmf);
+ p = (void (*)(struct vm86frame *))((uintptr_t)vm86_biosret +
+ setidt_disp);
+ p(vmf);
}
int
vm86_intcall(int intnum, struct vm86frame *vmf)
{
+ int (*p)(struct vm86frame *);
int retval;
if (intnum < 0 || intnum > 0xff)
return (EINVAL);
vmf->vmf_trapno = intnum;
+ p = (int (*)(struct vm86frame *))((uintptr_t)vm86_bioscall +
+ setidt_disp);
mtx_lock(&vm86_lock);
critical_enter();
- retval = vm86_bioscall(vmf);
+ retval = p(vmf);
critical_exit();
mtx_unlock(&vm86_lock);
return (retval);
@@ -589,10 +644,12 @@
int
vm86_datacall(int intnum, struct vm86frame *vmf, struct vm86context *vmc)
{
- pt_entry_t *pte = (pt_entry_t *)vm86paddr;
+ pt_entry_t *pte;
+ int (*p)(struct vm86frame *);
vm_paddr_t page;
int i, entry, retval;
+ pte = (pt_entry_t *)vm86paddr;
mtx_lock(&vm86_lock);
for (i = 0; i < vmc->npages; i++) {
page = vtophys(vmc->pmap[i].kva & PG_FRAME);
@@ -603,8 +660,10 @@
}
vmf->vmf_trapno = intnum;
+ p = (int (*)(struct vm86frame *))((uintptr_t)vm86_bioscall +
+ setidt_disp);
critical_enter();
- retval = vm86_bioscall(vmf);
+ retval = p(vmf);
critical_exit();
for (i = 0; i < vmc->npages; i++) {
Index: sys/i386/i386/vm86bios.s
===================================================================
--- sys/i386/i386/vm86bios.s
+++ sys/i386/i386/vm86bios.s
@@ -100,9 +100,8 @@
movl %cr3,%eax
pushl %eax /* save address space */
- movl IdlePTD,%ecx
+ movl IdlePTD,%ecx /* va (and pa) of Idle PTD */
movl %ecx,%ebx
- addl $KERNBASE,%ebx /* va of Idle PTD */
movl 0(%ebx),%eax
pushl %eax /* old ptde != 0 when booting */
pushl %ebx /* keep for reuse */
@@ -119,7 +118,8 @@
movl SCR_VMFRAME(%edx),%esp /* switch to new stack */
pushl %esp
- call vm86_prepcall /* finish setup */
+ movl $vm86_prepcall, %eax
+ call *%eax /* finish setup */
add $4, %esp
/*
Index: sys/i386/i386/vm_machdep.c
===================================================================
--- sys/i386/i386/vm_machdep.c
+++ sys/i386/i386/vm_machdep.c
@@ -204,9 +204,11 @@
* Create a new fresh stack for the new process.
* Copy the trap frame for the return to user mode as if from a
* syscall. This copies most of the user mode register values.
- * The -16 is so we can expand the trapframe if we go to vm86.
+ * The -VM86_STACK_SPACE (-16) is so we can expand the trapframe
+ * if we go to vm86.
*/
- td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb - 16) - 1;
+ td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb -
+ VM86_STACK_SPACE) - 1;
bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
td2->td_frame->tf_eax = 0; /* Child returns zero */
@@ -238,7 +240,7 @@
pcb2->pcb_ebp = 0;
pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *);
pcb2->pcb_ebx = (int)td2; /* fork_trampoline argument */
- pcb2->pcb_eip = (int)fork_trampoline;
+ pcb2->pcb_eip = (int)fork_trampoline + setidt_disp;
/*-
* pcb2->pcb_dr*: cloned above.
* pcb2->pcb_savefpu: cloned above.
@@ -344,8 +346,7 @@
* XXX do we need to move the TSS off the allocated pages
* before freeing them? (not done here)
*/
- kmem_free(kernel_arena, (vm_offset_t)pcb->pcb_ext,
- ctob(IOPAGES + 1));
+ pmap_trm_free(pcb->pcb_ext, ctob(IOPAGES + 1));
pcb->pcb_ext = NULL;
}
}
@@ -367,7 +368,8 @@
struct xstate_hdr *xhdr;
td->td_pcb = pcb = get_pcb_td(td);
- td->td_frame = (struct trapframe *)((caddr_t)pcb - 16) - 1;
+ td->td_frame = (struct trapframe *)((caddr_t)pcb -
+ VM86_STACK_SPACE) - 1;
pcb->pcb_ext = NULL;
pcb->pcb_save = get_pcb_user_save_pcb(pcb);
if (use_xsave) {
@@ -462,7 +464,7 @@
pcb2->pcb_ebp = 0;
pcb2->pcb_esp = (int)td->td_frame - sizeof(void *); /* trampoline arg */
pcb2->pcb_ebx = (int)td; /* trampoline arg */
- pcb2->pcb_eip = (int)fork_trampoline;
+ pcb2->pcb_eip = (int)fork_trampoline + setidt_disp;
pcb2->pcb_gs = rgs();
/*
* If we didn't copy the pcb, we'd need to do the following registers:
@@ -581,7 +583,7 @@
*/
ptep = vtopte(sf->kva);
opte = *ptep;
- *ptep = VM_PAGE_TO_PHYS(sf->m) | pgeflag | PG_RW | PG_V |
+ *ptep = VM_PAGE_TO_PHYS(sf->m) | PG_RW | PG_V |
pmap_cache_bits(sf->m->md.pat_mode, 0);
/*
Index: sys/i386/include/asmacros.h
===================================================================
--- sys/i386/include/asmacros.h
+++ sys/i386/include/asmacros.h
@@ -1,3 +1,4 @@
+/* -*- mode: asm -*- */
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
@@ -135,6 +136,10 @@
#endif /* GPROF */
#ifdef LOCORE
+
+#define GSEL_KPL 0x0020 /* GSEL(GCODE_SEL, SEL_KPL) */
+#define SEL_RPL_MASK 0x0003
+
/*
* Convenience macro for declaring interrupt entry points.
*/
@@ -144,16 +149,21 @@
/*
* Macros to create and destroy a trap frame.
*/
-#define PUSH_FRAME \
- pushl $0 ; /* dummy error code */ \
- pushl $0 ; /* dummy trap type */ \
- pushal ; /* 8 ints */ \
- pushl $0 ; /* save data and extra segments ... */ \
- movw %ds,(%esp) ; \
- pushl $0 ; \
- movw %es,(%esp) ; \
- pushl $0 ; \
+ .macro PUSH_FRAME2
+ pushal
+ pushl $0
+ movw %ds,(%esp)
+ pushl $0
+ movw %es,(%esp)
+ pushl $0
movw %fs,(%esp)
+ .endm
+
+ .macro PUSH_FRAME
+ pushl $0 /* dummy error code */
+ pushl $0 /* dummy trap type */
+ PUSH_FRAME2
+ .endm
/*
* Access per-CPU data.
@@ -167,12 +177,43 @@
/*
* Setup the kernel segment registers.
*/
-#define SET_KERNEL_SREGS \
- movl $KDSEL, %eax ; /* reload with kernel's data segment */ \
- movl %eax, %ds ; \
- movl %eax, %es ; \
- movl $KPSEL, %eax ; /* reload with per-CPU data segment */ \
+ .macro SET_KERNEL_SREGS
+ movl $KDSEL, %eax /* reload with kernel's data segment */
+ movl %eax, %ds
+ movl %eax, %es
+ movl $KPSEL, %eax /* reload with per-CPU data segment */
movl %eax, %fs
+ .endm
+
+ .macro NMOVE_STACKS
+ movl PCPU(KESP0), %edx
+ movl $TF_SZ, %ecx
+ testl $PSL_VM, TF_EFLAGS(%esp)
+ jz 1001f
+ addl $(4*4), %ecx
+1001: subl %ecx, %edx
+ movl %edx, %edi
+ movl %esp, %esi
+ rep; movsb
+ movl %edx, %esp
+ .endm
+
+ .macro MOVE_STACKS
+ call 1000f
+1000: popl %eax
+ movl (tramp_idleptd - 1000b)(%eax), %eax
+ movl %eax, %cr3
+ NMOVE_STACKS
+ .endm
+
+ .macro KENTER
+ testl $PSL_VM, TF_EFLAGS(%esp)
+ jnz 2f
+ testb $SEL_RPL_MASK, TF_CS(%esp)
+ jz 2f
+1: MOVE_STACKS
+2:
+ .endm
#endif /* LOCORE */
Index: sys/i386/include/frame.h
===================================================================
--- sys/i386/include/frame.h
+++ sys/i386/include/frame.h
@@ -41,4 +41,8 @@
#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
+#define TRAMP_STACK_SZ 4096
+#define TRAMP_COPYOUT_SZ 128
+#define VM86_STACK_SPACE 16
+
#endif /* _I386_FRAME_H_ */
Index: sys/i386/include/md_var.h
===================================================================
--- sys/i386/include/md_var.h
+++ sys/i386/include/md_var.h
@@ -45,14 +45,18 @@
#endif
#ifdef COMPAT_43
extern int szosigcode;
+extern int sz_lcall_tramp;
#endif
extern uint32_t *vm_page_dump;
+extern vm_offset_t proc0kstack;
+extern uintptr_t setidt_disp;
struct segment_descriptor;
union savefpu;
void bcopyb(const void *from, void *to, size_t len);
void cpu_switch_load_gs(void) __asm(__STRING(cpu_switch_load_gs));
+void copyout_init_tramp(void);
void doreti_iret(void) __asm(__STRING(doreti_iret));
void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault));
void doreti_popl_ds(void) __asm(__STRING(doreti_popl_ds));
@@ -71,6 +75,7 @@
void set_fsbase(struct thread *td, uint32_t base);
void set_gsbase(struct thread *td, uint32_t base);
void setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int selec);
+void setidt_nodisp(int idx, uintptr_t func, int typ, int dpl, int selec);
union savefpu *get_pcb_user_save_td(struct thread *td);
union savefpu *get_pcb_user_save_pcb(struct pcb *pcb);
Index: sys/i386/include/param.h
===================================================================
--- sys/i386/include/param.h
+++ sys/i386/include/param.h
@@ -164,7 +164,6 @@
#define pgtok(x) ((x) * (PAGE_SIZE / 1024))
-#define INKERNEL(va) (((vm_offset_t)(va)) >= VM_MAXUSER_ADDRESS && \
- ((vm_offset_t)(va)) < VM_MAX_KERNEL_ADDRESS)
+#define INKERNEL(va) (TRUE)
#endif /* !_I386_INCLUDE_PARAM_H_ */
Index: sys/i386/include/pc/bios.h
===================================================================
--- sys/i386/include/pc/bios.h
+++ sys/i386/include/pc/bios.h
@@ -267,8 +267,8 @@
};
#ifdef _KERNEL
-#define BIOS_PADDRTOVADDR(x) ((x) + KERNBASE)
-#define BIOS_VADDRTOPADDR(x) ((x) - KERNBASE)
+#define BIOS_PADDRTOVADDR(x) ((x) + PMAP_MAP_LOW)
+#define BIOS_VADDRTOPADDR(x) ((x) - PMAP_MAP_LOW)
struct bios_oem_signature {
char * anchor; /* search anchor string in BIOS memory */
Index: sys/i386/include/pcpu.h
===================================================================
--- sys/i386/include/pcpu.h
+++ sys/i386/include/pcpu.h
@@ -42,21 +42,23 @@
#include <sys/_mutex.h>
/*
- * The SMP parts are setup in pmap.c and locore.s for the BSP, and
- * mp_machdep.c sets up the data for the AP's to "see" when they awake.
- * The reason for doing it via a struct is so that an array of pointers
- * to each CPU's data can be set up for things like "check curproc on all
- * other processors"
+ * The SMP parts are setup in pmap.c and machdep.c for the BSP, and
+ * pmap.c and mp_machdep.c sets up the data for the AP's to "see" when
+ * they awake. The reason for doing it via a struct is so that an
+ * array of pointers to each CPU's data can be set up for things like
+ * "check curproc on all other processors"
*/
#define PCPU_MD_FIELDS \
char pc_monitorbuf[128] __aligned(128); /* cache line */ \
struct pcpu *pc_prvspace; /* Self-reference */ \
struct pmap *pc_curpmap; \
- struct i386tss pc_common_tss; \
struct segment_descriptor pc_common_tssd; \
struct segment_descriptor *pc_tss_gdt; \
struct segment_descriptor *pc_fsgs_gdt; \
+ struct i386tss *pc_common_tssp; \
+ u_int pc_kesp0; \
+ u_int pc_trampstk; \
int pc_currentldt; \
u_int pc_acpi_id; /* ACPI CPU id */ \
u_int pc_apic_id; \
@@ -69,8 +71,13 @@
caddr_t pc_cmap_addr1; \
caddr_t pc_cmap_addr2; \
vm_offset_t pc_qmap_addr; /* KVA for temporary mappings */\
+ vm_offset_t pc_copyout_maddr; \
+ vm_offset_t pc_copyout_saddr; \
+ struct mtx pc_copyout_mlock; \
+ struct sx pc_copyout_slock; \
+ char *pc_copyout_buf; \
uint32_t pc_smp_tlb_done; /* TLB op acknowledgement */ \
- char __pad[445]
+ char __pad[550]
#ifdef _KERNEL
Index: sys/i386/include/pmap.h
===================================================================
--- sys/i386/include/pmap.h
+++ sys/i386/include/pmap.h
@@ -112,12 +112,10 @@
* For PAE, the page table page unit size is 2MB. This means that 512 pages
* is 1 Gigabyte. Double everything. It must be a multiple of 8 for PAE.
*/
-#ifndef KVA_PAGES
#if defined(PAE) || defined(PAE_TABLES)
-#define KVA_PAGES 512
+#define KVA_PAGES (512*4)
#else
-#define KVA_PAGES 256
-#endif
+#define KVA_PAGES (256*4)
#endif
/*
@@ -150,12 +148,13 @@
/*
* The *PTDI values control the layout of virtual memory
- *
- * XXX This works for now, but I am not real happy with it, I'll fix it
- * right after I fix locore.s and the magic 28K hole
*/
-#define KPTDI (NPDEPTD-NKPDE) /* start of kernel virtual pde's */
-#define PTDPTDI (KPTDI-NPGPTD) /* ptd entry that points to ptd! */
+#define KPTDI 0 /* start of kernel virtual pde's */
+#define LOWPTDI 1 /* low memory map pde */
+#define KERNPTDI 2 /* start of kernel text pde */
+#define PTDPTDI (NPDEPTD - 1 - NPGPTD) /* ptd entry that points
+ to ptd! */
+#define TRPTDI (NPDEPTD - 1) /* u/k trampoline ptd */
/*
* XXX doesn't really belong here I guess...
@@ -311,6 +310,7 @@
table */
#endif
struct vm_radix pm_root; /* spare page table pages */
+ vm_page_t pm_ptdpg[NPGPTD];
};
typedef struct pmap *pmap_t;
@@ -396,6 +396,8 @@
void pmap_invalidate_cache_pages(vm_page_t *pages, int count);
void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva,
boolean_t force);
+void *pmap_trm_alloc(size_t size, int flags);
+void pmap_trm_free(void *addr, size_t size);
void invltlb_glob(void);
Index: sys/i386/include/segments.h
===================================================================
--- sys/i386/include/segments.h
+++ sys/i386/include/segments.h
@@ -84,11 +84,10 @@
#ifdef _KERNEL
extern int _default_ldt;
-extern union descriptor gdt[];
-extern union descriptor ldt[NLDT];
+extern union descriptor *gdt;
+extern union descriptor *ldt;
extern struct soft_segment_descriptor gdt_segs[];
extern struct gate_descriptor *idt;
-extern struct region_descriptor r_gdt, r_idt;
void lgdt(struct region_descriptor *rdp);
void sdtossd(struct segment_descriptor *sdp,
Index: sys/i386/include/vmparam.h
===================================================================
--- sys/i386/include/vmparam.h
+++ sys/i386/include/vmparam.h
@@ -136,7 +136,7 @@
* Kernel physical load address.
*/
#ifndef KERNLOAD
-#define KERNLOAD (1 << PDRSHIFT)
+#define KERNLOAD (KERNPTDI << PDRSHIFT)
#endif /* !defined(KERNLOAD) */
/*
@@ -146,23 +146,47 @@
* messy at times, but hey, we'll do anything to save a page :-)
*/
-#define VM_MAX_KERNEL_ADDRESS VADDR(KPTDI+NKPDE-1, NPTEPG-1)
+#define VM_MAX_KERNEL_ADDRESS VADDR(PTDPTDI, 0)
-#define VM_MIN_KERNEL_ADDRESS VADDR(PTDPTDI, PTDPTDI)
+#define VM_MIN_KERNEL_ADDRESS 0
-#define KERNBASE VADDR(KPTDI, 0)
+#define KERNBASE KERNLOAD
#define UPT_MAX_ADDRESS VADDR(PTDPTDI, PTDPTDI)
#define UPT_MIN_ADDRESS VADDR(PTDPTDI, 0)
-#define VM_MAXUSER_ADDRESS VADDR(PTDPTDI, 0)
+#define VM_MAXUSER_ADDRESS VADDR(TRPTDI, 0)
#define SHAREDPAGE (VM_MAXUSER_ADDRESS - PAGE_SIZE)
#define USRSTACK SHAREDPAGE
-#define VM_MAX_ADDRESS VADDR(PTDPTDI, PTDPTDI)
+#define VM_MAX_ADDRESS VADDR(PTDPTDI, 0)
#define VM_MIN_ADDRESS ((vm_offset_t)0)
+#define PMAP_TRM_MIN_ADDRESS VM_MAXUSER_ADDRESS
+#define PMAP_TRM_MAX_ADDRESS 0xffffffff
+
+#define PMAP_MAP_LOW VADDR(LOWPTDI, 0)
+
+/*
+ * KVA layout. The unit of the system allocation is single PDE, which
+ * represents NBPDR bytes, aligned to NBPDR. NBPDR is 4M for non-PAE
+ * page tables, and 2M for PAE. Addresses below are shown for non-PAE.
+ *
+ * 0x00000000 - 0x003fffff Transient identity map of low memory (0-4M),
+ * normally disabled to catch NULL derefs.
+ * 0x00400000 - 0x007fffff Fixed mapping of the low memory (0-4M).
+ * 0x00800000 - 0xffbfffff KERNBASE (VA) == KERNLOAD (PA), kernel
+ * text + data and all kernel maps. Managed
+ * by MI VM.
+ * 0xffc00000 - 0xffdfffff Recursive kernel page table mapping, pointed
+ * to by PTmap. PTD[] recusively points
+ * into PTmap.
+ * 0xffe00000 - 0xffffffff Kernel/User mode shared PDE, contains GDT,
+ * IDT, TSS, LDT, trampoline code and stacks.
+ * Managed by pmap_trm_alloc().
+ */
+
/*
* How many physical pages per kmem arena virtual page.
*/
Index: sys/kern/imgact_aout.c
===================================================================
--- sys/kern/imgact_aout.c
+++ sys/kern/imgact_aout.c
@@ -67,7 +67,12 @@
static int exec_aout_imgact(struct image_params *imgp);
static int aout_fixup(register_t **stack_base, struct image_params *imgp);
+#define AOUT32_USRSTACK 0xbfc00000
+
#if defined(__i386__)
+
+#define AOUT32_PS_STRINGS (AOUT32_USRSTACK - sizeof(struct ps_strings))
+
struct sysentvec aout_sysvec = {
.sv_size = SYS_MAXSYSCALL,
.sv_table = sysent,
@@ -85,9 +90,9 @@
.sv_minsigstksz = MINSIGSTKSZ,
.sv_pagesize = PAGE_SIZE,
.sv_minuser = VM_MIN_ADDRESS,
- .sv_maxuser = VM_MAXUSER_ADDRESS,
- .sv_usrstack = USRSTACK,
- .sv_psstrings = PS_STRINGS,
+ .sv_maxuser = AOUT32_USRSTACK,
+ .sv_usrstack = AOUT32_USRSTACK,
+ .sv_psstrings = AOUT32_PS_STRINGS,
.sv_stackprot = VM_PROT_ALL,
.sv_copyout_strings = exec_copyout_strings,
.sv_setregs = exec_setregs,
@@ -104,10 +109,9 @@
#elif defined(__amd64__)
-#define AOUT32_USRSTACK 0xbfc00000
#define AOUT32_PS_STRINGS \
(AOUT32_USRSTACK - sizeof(struct freebsd32_ps_strings))
-#define AOUT32_MINUSER FREEBSD32_MINUSER
+#define AOUT32_MINUSER FREEBSD32_MINUSER
extern const char *freebsd32_syscallnames[];
extern u_long ia32_maxssiz;
Index: sys/kern/subr_witness.c
===================================================================
--- sys/kern/subr_witness.c
+++ sys/kern/subr_witness.c
@@ -480,7 +480,9 @@
static unsigned int w_generation = 0;
static const char w_notrunning[] = "Witness not running\n";
static const char w_stillcold[] = "Witness is still cold\n";
-
+#ifdef __i386__
+static const char w_notallowed[] = "The sysctl is disabled on the arch\n";
+#endif
static struct witness_order_list_entry order_lists[] = {
/*
@@ -2779,6 +2781,11 @@
struct sbuf *sb;
int error;
+#ifdef __i386__
+ error = SYSCTL_OUT(req, w_notallowed, sizeof(w_notallowed));
+ return (error);
+#endif
+
if (witness_watch < 1) {
error = SYSCTL_OUT(req, w_notrunning, sizeof(w_notrunning));
return (error);
Index: sys/x86/acpica/acpi_wakeup.c
===================================================================
--- sys/x86/acpica/acpi_wakeup.c
+++ sys/x86/acpica/acpi_wakeup.c
@@ -141,8 +141,13 @@
}
#define WARMBOOT_TARGET 0
+#ifdef __amd64__
#define WARMBOOT_OFF (KERNBASE + 0x0467)
#define WARMBOOT_SEG (KERNBASE + 0x0469)
+#else /* __i386__ */
+#define WARMBOOT_OFF (PMAP_MAP_LOW + 0x0467)
+#define WARMBOOT_SEG (PMAP_MAP_LOW + 0x0469)
+#endif
#define CMOS_REG (0x70)
#define CMOS_DATA (0x71)
@@ -186,7 +191,7 @@
* cpususpend_handler() and we will release them soon. Then each
* will invalidate its TLB.
*/
- kernel_pmap->pm_pdir[0] = 0;
+ PTD[KPTDI] = 0;
invltlb_glob();
#endif
@@ -256,7 +261,7 @@
* be careful to use the kernel map (PTD[0] is for curthread
* which may be a user thread in deprecated APIs).
*/
- kernel_pmap->pm_pdir[0] = PTD[KPTDI];
+ PTD[KPTDI] = PTD[LOWPTDI];
#endif
/* Call ACPICA to enter the desired sleep state */
Index: sys/x86/x86/local_apic.c
===================================================================
--- sys/x86/x86/local_apic.c
+++ sys/x86/x86/local_apic.c
@@ -78,11 +78,9 @@
#ifdef __amd64__
#define SDT_APIC SDT_SYSIGT
-#define SDT_APICT SDT_SYSIGT
#define GSEL_APIC 0
#else
#define SDT_APIC SDT_SYS386IGT
-#define SDT_APICT SDT_SYS386TGT
#define GSEL_APIC GSEL(GCODE_SEL, SEL_KPL)
#endif
@@ -517,7 +515,7 @@
/* Local APIC CMCI. */
setidt(APIC_CMC_INT, pti ? IDTVEC(cmcint_pti) : IDTVEC(cmcint),
- SDT_APICT, SEL_KPL, GSEL_APIC);
+ SDT_APIC, SEL_KPL, GSEL_APIC);
if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) {
arat = 0;
@@ -1605,7 +1603,7 @@
* We can not currently clear the idt entry because other cpus
* may have a valid vector at this offset.
*/
- setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APICT,
+ setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC,
SEL_KPL, GSEL_APIC);
#endif
}
@@ -2146,7 +2144,7 @@
KASSERT(func != (uintptr_t)&IDTVEC(rsvd) &&
func != (uintptr_t)&IDTVEC(rsvd_pti),
("invalid idtfunc %#lx", func));
- setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APICT,
+ setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC,
SEL_KPL, GSEL_APIC);
mtx_unlock_spin(&icu_lock);
}
Index: sys/x86/x86/mp_x86.c
===================================================================
--- sys/x86/x86/mp_x86.c
+++ sys/x86/x86/mp_x86.c
@@ -1686,8 +1686,10 @@
generation = smp_tlb_generation;
if (smp_tlb_pmap == kernel_pmap)
invltlb_glob();
+#ifdef __amd64__
else
invltlb();
+#endif
PCPU_SET(smp_tlb_done, generation);
}
@@ -1704,7 +1706,10 @@
#endif /* COUNT_IPIS */
generation = smp_tlb_generation; /* Overlap with serialization */
- invlpg(smp_tlb_addr1);
+#ifdef __i386__
+ if (smp_tlb_pmap == kernel_pmap)
+#endif
+ invlpg(smp_tlb_addr1);
PCPU_SET(smp_tlb_done, generation);
}
@@ -1724,10 +1729,13 @@
addr = smp_tlb_addr1;
addr2 = smp_tlb_addr2;
generation = smp_tlb_generation; /* Overlap with serialization */
- do {
- invlpg(addr);
- addr += PAGE_SIZE;
- } while (addr < addr2);
+#ifdef __i386__
+ if (smp_tlb_pmap == kernel_pmap)
+#endif
+ do {
+ invlpg(addr);
+ addr += PAGE_SIZE;
+ } while (addr < addr2);
PCPU_SET(smp_tlb_done, generation);
}
Index: sys/x86/x86/mptable.c
===================================================================
--- sys/x86/x86/mptable.c
+++ sys/x86/x86/mptable.c
@@ -221,8 +221,13 @@
search_for_sig(u_int32_t target, int count)
{
int x;
- u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
+ u_int32_t *addr;
+#ifdef __amd64__
+ addr = (u_int32_t *) (KERNBASE + target);
+#else /* __i386__ */
+ addr = (u_int32_t *) (PMAP_MAP_LOW + target);
+#endif
for (x = 0; x < count; x += 4)
if (addr[x] == MP_SIG)
/* make array index a byte index */
@@ -253,7 +258,13 @@
u_int32_t target;
/* see if EBDA exists */
- if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
+ if ((segment = (u_long) * (u_short *) (
+#ifdef __amd64__
+ KERNBASE
+#else /* __i386__ */
+ PMAP_MAP_LOW
+#endif
+ + 0x40e)) != 0) {
/* search first 1K of EBDA */
target = (u_int32_t) (segment << 4);
if ((x = search_for_sig(target, 1024 / 4)) >= 0)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Jan 25, 7:44 PM (18 h, 37 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16161474
Default Alt Text
D14633.diff (183 KB)
Attached To
Mode
D14633: i386 4/4G split
Attached
Detach File
Event Timeline
Log In to Comment