Page MenuHomeFreeBSD

D45471.diff
No OneTemporary

D45471.diff

diff --git a/sys/riscv/include/md_var.h b/sys/riscv/include/md_var.h
--- a/sys/riscv/include/md_var.h
+++ b/sys/riscv/include/md_var.h
@@ -44,6 +44,7 @@
/* Supervisor-mode extension support */
extern bool has_sstc;
extern bool has_sscofpmf;
+extern bool has_svpbmt;
struct dumperinfo;
struct minidumpstate;
diff --git a/sys/riscv/include/pte.h b/sys/riscv/include/pte.h
--- a/sys/riscv/include/pte.h
+++ b/sys/riscv/include/pte.h
@@ -83,6 +83,25 @@
#define PTE_PROMOTE (PTE_V | PTE_RWX | PTE_D | PTE_G | PTE_U | \
PTE_SW_MANAGED | PTE_SW_WIRED)
+/*
+ * Svpbmt Memory Attribute (MA) bits [62:61].
+ *
+ * +------+-------+------------------------------------------------------------+
+ * | Mode | Value | Requested Memory Attributes |
+ * +------+-------+------------------------------------------------------------+
+ * | PMA | 00 | None, inherited from Physical Memory Attributes (firmware) |
+ * | NC | 01 | Non-cacheable, idempotent, weakly-ordered (RVWMO), |
+ * | | | main memory |
+ * | IO | 10 | Non-cacheable, non-idempotent, strongly-ordered, I/O |
+ * | -- | 11 | Reserved |
+ * +------+-------+------------------------------------------------------------+
+ */
+#define PTE_MA_SHIFT 61
+#define PTE_MA_MASK (0x3ul << PTE_MA_SHIFT)
+#define PTE_MA_NONE (0ul)
+#define PTE_MA_NC (1ul << PTE_MA_SHIFT)
+#define PTE_MA_IO (2ul << PTE_MA_SHIFT)
+
/* Bits 63 - 54 are reserved for future use. */
#define PTE_HI_MASK 0xFFC0000000000000ULL
diff --git a/sys/riscv/include/vm.h b/sys/riscv/include/vm.h
--- a/sys/riscv/include/vm.h
+++ b/sys/riscv/include/vm.h
@@ -28,10 +28,14 @@
#define _MACHINE_VM_H_
/* Memory attribute configuration. */
-#define VM_MEMATTR_DEVICE 0
+#define VM_MEMATTR_PMA 0
#define VM_MEMATTR_UNCACHEABLE 1
-#define VM_MEMATTR_WRITE_BACK 2
+#define VM_MEMATTR_DEVICE 2
-#define VM_MEMATTR_DEFAULT VM_MEMATTR_WRITE_BACK
+#define VM_MEMATTR_WRITE_BACK VM_MEMATTR_PMA
+#define VM_MEMATTR_DEFAULT VM_MEMATTR_PMA
+
+#define VM_MEMATTR_LAST VM_MEMATTR_DEVICE
+#define VM_MEMATTR_TOTAL (VM_MEMATTR_LAST + 1)
#endif /* !_MACHINE_VM_H_ */
diff --git a/sys/riscv/riscv/identcpu.c b/sys/riscv/riscv/identcpu.c
--- a/sys/riscv/riscv/identcpu.c
+++ b/sys/riscv/riscv/identcpu.c
@@ -74,6 +74,7 @@
/* Supervisor-mode extension support. */
bool __read_frequently has_sstc;
bool __read_frequently has_sscofpmf;
+bool has_svpbmt;
struct cpu_desc {
const char *cpu_mvendor_name;
@@ -414,6 +415,7 @@
/* Supervisor-mode extension support. */
UPDATE_CAP(has_sstc, (desc->smode_extensions & SV_SSTC) != 0);
UPDATE_CAP(has_sscofpmf, (desc->smode_extensions & SV_SSCOFPMF) != 0);
+ UPDATE_CAP(has_svpbmt, (desc->smode_extensions & SV_SVPBMT) != 0);
#undef UPDATE_CAP
}
diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c
--- a/sys/riscv/riscv/pmap.c
+++ b/sys/riscv/riscv/pmap.c
@@ -369,6 +369,8 @@
((((pte) & ~PTE_HI_MASK) >> PTE_PPN0_S) * PAGE_SIZE)
#define L2PTE_TO_PHYS(l2) \
((((l2) & ~PTE_HI_MASK) >> PTE_PPN1_S) << L2_SHIFT)
+#define L1PTE_TO_PHYS(l1) \
+ ((((l1) & ~PTE_HI_MASK) >> PTE_PPN2_S) << L1_SHIFT)
#define PTE_TO_VM_PAGE(pte) PHYS_TO_VM_PAGE(PTE_TO_PHYS(pte))
/*
@@ -533,6 +535,25 @@
mtx_unlock(&allpmaps_lock);
}
+/*
+ * Holds the PTE mode bits (defined in pte.h) for defining e.g. cacheability.
+ *
+ * The indices correspond to the VM_MEMATTR_* defines in riscv/include/vm.h.
+ *
+ * The array will be empty if no mode bits are supported by the CPU, e.g. when
+ * lacking the Svpbmt extension.
+ */
+static __read_frequently pt_entry_t memattr_bits[VM_MEMATTR_TOTAL];
+static __read_frequently pt_entry_t memattr_mask;
+
+static __inline pt_entry_t
+pmap_memattr_bits(vm_memattr_t mode)
+{
+ KASSERT(pmap_is_valid_memattr(kernel_pmap, mode),
+ ("invalid memory mode %u\n", mode));
+ return (memattr_bits[(int)mode]);
+}
+
/*
* This should only be used during pmap bootstrap e.g. by
* pmap_create_pagetables().
@@ -568,6 +589,7 @@
vm_offset_t va;
vm_paddr_t min_pa, max_pa, pa, endpa;
pd_entry_t *l2;
+ pt_entry_t memattr;
u_int l1slot, l2slot;
int physmap_idx;
@@ -583,6 +605,8 @@
dmap_phys_base = rounddown(min_pa, L1_SIZE);
dmap_phys_max = max_pa;
+ memattr = pmap_memattr_bits(VM_MEMATTR_DEFAULT);
+
/* Walk the physmap table. */
l2 = NULL;
l1slot = Ln_ENTRIES; /* sentinel value */
@@ -611,7 +635,7 @@
/* map l2 pages */
l2slot = pmap_l2_index(va);
- pmap_store(&l2[l2slot], L2_PTE(pa, PTE_KERN));
+ pmap_store(&l2[l2slot], L2_PTE(pa, PTE_KERN | memattr));
pa += L2_SIZE;
va += L2_SIZE;
@@ -621,7 +645,7 @@
while (pa + L1_SIZE - 1 < endpa) {
/* map l1 pages */
l1slot = pmap_l1_index(va);
- pmap_store(&l1[l1slot], L1_PTE(pa, PTE_KERN));
+ pmap_store(&l1[l1slot], L1_PTE(pa, PTE_KERN | memattr));
pa += L1_SIZE;
va += L1_SIZE;
@@ -641,7 +665,7 @@
/* map l2 pages */
l2slot = pmap_l2_index(va);
- pmap_store(&l2[l2slot], L2_PTE(pa, PTE_KERN));
+ pmap_store(&l2[l2slot], L2_PTE(pa, PTE_KERN | memattr));
pa += L2_SIZE;
va += L2_SIZE;
@@ -673,6 +697,7 @@
vm_paddr_t *root_pt_phys)
{
pt_entry_t *l0, *l1, *kern_l2, *kern_l3, *devmap_l3;
+ pt_entry_t memattr;
pd_entry_t *devmap_l2;
vm_paddr_t kernend, freemempos, pa;
int nkernl2, nkernl3, ndevmapl3;
@@ -745,6 +770,9 @@
if (freemempos < roundup2(kernend, L2_SIZE))
freemempos = roundup2(kernend, L2_SIZE);
+ /* Memory attributes for standard/main memory. */
+ memattr = pmap_memattr_bits(VM_MEMATTR_DEFAULT);
+
/*
* Map the kernel (and preloaded modules or data) using L2 superpages.
*
@@ -757,7 +785,8 @@
*/
slot = pmap_l2_index(KERNBASE);
for (pa = kernstart; pa < kernend; pa += L2_SIZE, slot++) {
- pmap_store(&kern_l2[slot], L2_PTE(pa, PTE_KERN | PTE_X));
+ pmap_store(&kern_l2[slot],
+ L2_PTE(pa, PTE_KERN | PTE_X | memattr));
}
/*
@@ -830,6 +859,16 @@
*/
CPU_SET(PCPU_GET(hart), &kernel_pmap->pm_active);
+ /*
+ * Set up the memory attribute bits.
+ */
+ if (has_svpbmt) {
+ memattr_bits[VM_MEMATTR_PMA] = PTE_MA_NONE;
+ memattr_bits[VM_MEMATTR_UNCACHEABLE] = PTE_MA_NC;
+ memattr_bits[VM_MEMATTR_DEVICE] = PTE_MA_IO;
+ memattr_mask = PTE_MA_MASK;
+ }
+
/* Create a new set of pagetables to run the kernel in. */
freemempos = pmap_create_pagetables(kernstart, kernlen, &root_pt_phys);
@@ -862,7 +901,8 @@
pte = pmap_l3(kernel_pmap, dpcpu);
KASSERT(pte != NULL, ("Bootstrap pages missing"));
for (i = 0; i < howmany(DPCPU_SIZE, PAGE_SIZE); i++)
- pmap_store(&pte[i], L3_PTE(pa + ptoa(i), PTE_KERN));
+ pmap_store(&pte[i], L3_PTE(pa + ptoa(i), PTE_KERN |
+ pmap_memattr_bits(VM_MEMATTR_DEFAULT)));
/* Now, it can be initialized. */
dpcpu_init((void *)dpcpu, 0);
@@ -875,7 +915,8 @@
pte = pmap_l3(kernel_pmap, msgbufpv);
KASSERT(pte != NULL, ("Bootstrap pages missing"));
for (i = 0; i < howmany(msgbufsize, PAGE_SIZE); i++)
- pmap_store(&pte[i], L3_PTE(pa + ptoa(i), PTE_KERN));
+ pmap_store(&pte[i], L3_PTE(pa + ptoa(i), PTE_KERN |
+ pmap_memattr_bits(VM_MEMATTR_DEFAULT)));
#undef reserve_space
@@ -896,7 +937,7 @@
{
TAILQ_INIT(&m->md.pv_list);
- m->md.pv_memattr = VM_MEMATTR_WRITE_BACK;
+ m->md.pv_memattr = VM_MEMATTR_DEFAULT;
}
/*
@@ -1145,10 +1186,11 @@
***************************************************/
void
-pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode __unused)
+pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode)
{
pt_entry_t entry;
pt_entry_t *l3;
+ pt_entry_t memattr;
vm_offset_t va;
pn_t pn;
@@ -1159,6 +1201,7 @@
KASSERT((size & PAGE_MASK) == 0,
("pmap_kenter_device: Mapping is not page-sized"));
+ memattr = pmap_memattr_bits(mode);
va = sva;
while (size != 0) {
l3 = pmap_l3(kernel_pmap, va);
@@ -1166,6 +1209,7 @@
pn = (pa / PAGE_SIZE);
entry = PTE_KERN;
+ entry |= memattr;
entry |= (pn << PTE_PPN0_S);
pmap_store(l3, entry);
@@ -1253,7 +1297,8 @@
void
pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
{
- pt_entry_t *l3, pa;
+ pt_entry_t *l3;
+ vm_paddr_t pa;
vm_offset_t va;
vm_page_t m;
pt_entry_t entry;
@@ -1268,6 +1313,7 @@
l3 = pmap_l3(kernel_pmap, va);
entry = PTE_KERN;
+ entry |= pmap_memattr_bits(m->md.pv_memattr);
entry |= (pn << PTE_PPN0_S);
pmap_store(l3, entry);
@@ -3136,6 +3182,7 @@
new_l3 |= (pn << PTE_PPN0_S);
if ((flags & PMAP_ENTER_WIRED) != 0)
new_l3 |= PTE_SW_WIRED;
+ new_l3 |= pmap_memattr_bits(m->md.pv_memattr);
/*
* Set modified bit gratuitously for writeable mappings if
@@ -3371,7 +3418,8 @@
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
pn = VM_PAGE_TO_PHYS(m) / PAGE_SIZE;
- new_l2 = (pd_entry_t)((pn << PTE_PPN0_S) | PTE_R | PTE_V);
+ new_l2 = (pd_entry_t)((pn << PTE_PPN0_S) | PTE_R | PTE_V |
+ pmap_memattr_bits(m->md.pv_memattr));
if ((m->oflags & VPO_UNMANAGED) == 0)
new_l2 |= PTE_SW_MANAGED;
if ((prot & VM_PROT_EXECUTE) != 0)
@@ -3706,7 +3754,7 @@
pmap_resident_count_inc(pmap, 1);
newl3 = ((VM_PAGE_TO_PHYS(m) / PAGE_SIZE) << PTE_PPN0_S) |
- PTE_V | PTE_R;
+ PTE_V | PTE_R | pmap_memattr_bits(m->md.pv_memattr);
if ((prot & VM_PROT_EXECUTE) != 0)
newl3 |= PTE_X;
if ((m->oflags & VPO_UNMANAGED) == 0)
@@ -4804,9 +4852,13 @@
pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode)
{
vm_offset_t base, offset, tmpva;
+ vm_paddr_t phys;
pd_entry_t *l1, l1e;
pd_entry_t *l2, l2e;
pt_entry_t *l3, l3e;
+ pt_entry_t bits, mask;
+ bool anychanged = false;
+ int error = 0;
PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED);
base = trunc_page(va);
@@ -4817,46 +4869,155 @@
!(base >= VM_MIN_KERNEL_ADDRESS && base < VM_MAX_KERNEL_ADDRESS))
return (EINVAL);
+ bits = pmap_memattr_bits(mode);
+ mask = memattr_mask;
+
+ /* First loop: perform PTE validation and demotions as necessary. */
for (tmpva = base; tmpva < base + size; ) {
l1 = pmap_l1(kernel_pmap, tmpva);
if (l1 == NULL || ((l1e = pmap_load(l1)) & PTE_V) == 0)
return (EINVAL);
if ((l1e & PTE_RWX) != 0) {
/*
- * TODO: Demote if attributes don't match and there
- * isn't an L1 page left in the range, and update the
- * L1 entry if the attributes don't match but there is
- * an L1 page left in the range, once we support the
- * upcoming Svpbmt extension.
+ * If the existing PTE has the correct attributes, then
+ * no need to demote.
*/
- tmpva = (tmpva & ~L1_OFFSET) + L1_SIZE;
- continue;
+ if ((l1e & mask) == bits) {
+ tmpva = (tmpva & ~L1_OFFSET) + L1_SIZE;
+ continue;
+ }
+
+ /*
+ * If the 1GB page fits in the remaining range, we
+ * don't need to demote.
+ */
+ if ((tmpva & L1_OFFSET) == 0 &&
+ tmpva + L1_SIZE <= base + size) {
+ tmpva += L1_SIZE;
+ continue;
+ }
+
+ if (!pmap_demote_l1(kernel_pmap, l1, tmpva))
+ return (EINVAL);
}
l2 = pmap_l1_to_l2(l1, tmpva);
if (((l2e = pmap_load(l2)) & PTE_V) == 0)
return (EINVAL);
if ((l2e & PTE_RWX) != 0) {
/*
- * TODO: Demote if attributes don't match and there
- * isn't an L2 page left in the range, and update the
- * L2 entry if the attributes don't match but there is
- * an L2 page left in the range, once we support the
- * upcoming Svpbmt extension.
+ * If the existing PTE has the correct attributes, then
+ * no need to demote.
*/
- tmpva = (tmpva & ~L2_OFFSET) + L2_SIZE;
- continue;
+ if ((l2e & mask) == bits) {
+ tmpva = (tmpva & ~L2_OFFSET) + L2_SIZE;
+ continue;
+ }
+
+ /*
+ * If the 2MB page fits in the remaining range, we
+ * don't need to demote.
+ */
+ if ((tmpva & L2_OFFSET) == 0 &&
+ tmpva + L2_SIZE <= base + size) {
+ tmpva += L2_SIZE;
+ continue;
+ }
+
+ if (!pmap_demote_l2(kernel_pmap, l2, tmpva))
+ panic("l2 demotion failed");
}
l3 = pmap_l2_to_l3(l2, tmpva);
if (((l3e = pmap_load(l3)) & PTE_V) == 0)
return (EINVAL);
- /*
- * TODO: Update the L3 entry if the attributes don't match once
- * we support the upcoming Svpbmt extension.
- */
+
+ tmpva += PAGE_SIZE;
+ }
+
+ /* Second loop: perform PTE updates. */
+ for (tmpva = base; tmpva < base + size; ) {
+ l1 = pmap_l1(kernel_pmap, tmpva);
+ l1e = pmap_load(l1);
+ if ((l1e & PTE_RWX) != 0) {
+ /* Unchanged. */
+ if ((l1e & mask) == bits) {
+ tmpva += L1_SIZE;
+ continue;
+ }
+
+ l1e &= ~mask;
+ l1e |= bits;
+ pmap_store(l1, l1e);
+ anychanged = true;
+
+ /* Update corresponding DMAP entry */
+ phys = L1PTE_TO_PHYS(l1e);
+ if (!VIRT_IN_DMAP(tmpva) && PHYS_IN_DMAP(phys)) {
+ error = pmap_change_attr_locked(
+ PHYS_TO_DMAP(phys), L1_SIZE, mode);
+ if (error != 0)
+ break;
+ }
+ tmpva += L1_SIZE;
+ continue;
+ }
+
+ l2 = pmap_l1_to_l2(l1, tmpva);
+ l2e = pmap_load(l2);
+ if ((l2e & PTE_RWX) != 0) {
+ /* Unchanged. */
+ if ((l2e & mask) == bits) {
+ tmpva += L2_SIZE;
+ continue;
+ }
+
+ l2e &= ~mask;
+ l2e |= bits;
+ pmap_store(l2, l2e);
+ anychanged = true;
+
+ /* Update corresponding DMAP entry */
+ phys = L2PTE_TO_PHYS(l2e);
+ if (!VIRT_IN_DMAP(tmpva) && PHYS_IN_DMAP(phys)) {
+ error = pmap_change_attr_locked(
+ PHYS_TO_DMAP(phys), L2_SIZE, mode);
+ if (error != 0)
+ break;
+ }
+ tmpva += L2_SIZE;
+ continue;
+ }
+
+ l3 = pmap_l2_to_l3(l2, tmpva);
+ l3e = pmap_load(l3);
+
+ /* Unchanged. */
+ if ((l3e & mask) == bits) {
+ tmpva += PAGE_SIZE;
+ continue;
+ }
+
+ l3e &= ~mask;
+ l3e |= bits;
+ pmap_store(l3, l3e);
+ anychanged = true;
+
+ phys = PTE_TO_PHYS(l3e);
+ if (!VIRT_IN_DMAP(tmpva) && PHYS_IN_DMAP(phys)) {
+ error = pmap_change_attr_locked(PHYS_TO_DMAP(phys),
+ L3_SIZE, mode);
+ if (error != 0)
+ break;
+ }
tmpva += PAGE_SIZE;
}
- return (0);
+ if (anychanged) {
+ pmap_invalidate_range(kernel_pmap, base, tmpva);
+ if (mode == VM_MEMATTR_UNCACHEABLE)
+ cpu_dcache_wbinv_range((void *)base, size);
+ }
+
+ return (error);
}
/*
@@ -5093,7 +5254,7 @@
pmap_is_valid_memattr(pmap_t pmap __unused, vm_memattr_t mode)
{
- return (mode >= VM_MEMATTR_DEVICE && mode <= VM_MEMATTR_WRITE_BACK);
+ return (mode >= VM_MEMATTR_DEFAULT && mode <= VM_MEMATTR_LAST);
}
bool
@@ -5149,17 +5310,38 @@
sysctl_kmaps_dump(struct sbuf *sb, struct pmap_kernel_map_range *range,
vm_offset_t eva)
{
+ char *mode;
+ int i;
if (eva <= range->sva)
return;
- sbuf_printf(sb, "0x%016lx-0x%016lx r%c%c%c%c %d %d %d\n",
+ for (i = 0; i < nitems(memattr_bits); i++)
+ if ((range->attrs & memattr_mask) == memattr_bits[i])
+ break;
+
+ switch (i) {
+ case VM_MEMATTR_PMA:
+ mode = "PMA";
+ break;
+ case VM_MEMATTR_UNCACHEABLE:
+ mode = "NC ";
+ break;
+ case VM_MEMATTR_DEVICE:
+ mode = "IO ";
+ break;
+ default:
+ mode = "???";
+ break;
+ }
+
+ sbuf_printf(sb, "0x%016lx-0x%016lx r%c%c%c%c %s %d %d %d\n",
range->sva, eva,
(range->attrs & PTE_W) == PTE_W ? 'w' : '-',
(range->attrs & PTE_X) == PTE_X ? 'x' : '-',
(range->attrs & PTE_U) == PTE_U ? 'u' : 's',
(range->attrs & PTE_G) == PTE_G ? 'g' : '-',
- range->l1pages, range->l2pages, range->l3pages);
+ mode, range->l1pages, range->l2pages, range->l3pages);
/* Reset to sentinel value. */
range->sva = 0xfffffffffffffffful;
@@ -5199,14 +5381,19 @@
/* The PTE global bit is inherited by lower levels. */
attrs = l1e & PTE_G;
- if ((l1e & PTE_RWX) != 0)
+ if ((l1e & PTE_RWX) != 0) {
attrs |= l1e & (PTE_RWX | PTE_U);
- else if (l2e != 0)
+ attrs |= l1e & memattr_mask;
+ } else if (l2e != 0)
attrs |= l2e & PTE_G;
- if ((l2e & PTE_RWX) != 0)
+
+ if ((l2e & PTE_RWX) != 0) {
attrs |= l2e & (PTE_RWX | PTE_U);
- else if (l3e != 0)
+ attrs |= l2e & memattr_mask;
+ } else if (l3e != 0) {
attrs |= l3e & (PTE_RWX | PTE_U | PTE_G);
+ attrs |= l3e & memattr_mask;
+ }
if (range->sva > va || !sysctl_kmaps_match(range, attrs)) {
sysctl_kmaps_dump(sb, range, va);

File Metadata

Mime Type
text/plain
Expires
Thu, Nov 7, 5:39 AM (22 h, 11 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14505328
Default Alt Text
D45471.diff (15 KB)

Event Timeline