Page MenuHomeFreeBSD

D36916.diff
No OneTemporary

D36916.diff

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -6771,19 +6771,36 @@
/*
* Examine the first PTE in the specified PTP. Abort if this PTE is
- * either invalid, unused, or does not map the first 4KB physical page
- * within a 2MB page.
+ * ineligible for promotion due to hardware errata, invalid, or does
+ * not map the first 4KB physical page within a 2MB page.
*/
firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME);
newpde = *firstpte;
- if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V) ||
- !pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap,
- newpde))) {
+ if (!pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap, newpde)))
+ return;
+ if ((newpde & ((PG_FRAME & PDRMASK) | PG_V)) != PG_V) {
counter_u64_add(pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
return;
}
+
+ /*
+ * Both here and in the below "for" loop, to allow for repromotion
+ * after MADV_FREE, conditionally write protect a clean PTE before
+ * possibly aborting the promotion due to other PTE attributes. Why?
+ * Suppose that MADV_FREE is applied to a part of a superpage, the
+ * address range [S, E). pmap_advise() will demote the superpage
+ * mapping, destroy the 4KB page mapping at the end of [S, E), and
+ * clear PG_M and PG_A in the PTEs for the rest of [S, E). Later,
+ * imagine that the memory in [S, E) is recycled, but the last 4KB
+ * page in [S, E) is not the last to be rewritten, or simply accessed.
+ * In other words, there is still a 4KB page in [S, E), call it P,
+ * that is writeable but PG_M and PG_A are clear in P's PTE. Unless
+ * we write protect P before aborting the promotion, if and when P is
+ * finally rewritten, there won't be a page fault to trigger
+ * repromotion.
+ */
setpde:
if ((newpde & (PG_M | PG_RW)) == PG_RW) {
/*
@@ -6794,16 +6811,22 @@
goto setpde;
newpde &= ~PG_RW;
}
+ if ((newpde & PG_A) == 0) {
+ counter_u64_add(pmap_pde_p_failures, 1);
+ CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
+ " in pmap %p", va, pmap);
+ return;
+ }
/*
* Examine each of the other PTEs in the specified PTP. Abort if this
* PTE maps an unexpected 4KB physical page or does not have identical
* characteristics to the first PTE.
*/
- pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE;
+ pa = (newpde & (PG_PS_FRAME | PG_V)) + NBPDR - PAGE_SIZE;
for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) {
oldpte = *pte;
- if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) {
+ if ((oldpte & (PG_FRAME | PG_V)) != pa) {
counter_u64_add(pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -3955,17 +3955,38 @@
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PMAP_ASSERT_STAGE1(pmap);
+ /*
+ * Examine the first L3E in the specified PTP. Abort if this L3E is
+ * ineligible for promotion, invalid, or does not map the first 4KB
+ * physical page within a 2MB page.
+ */
firstl3 = (pt_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK);
newl2 = pmap_load(firstl3);
-
- if (((newl2 & (~ATTR_MASK | ATTR_AF)) & L2_OFFSET) != ATTR_AF ||
- (newl2 & ATTR_SW_NO_PROMOTE) != 0) {
+ if ((newl2 & ATTR_SW_NO_PROMOTE) != 0)
+ return;
+ if ((newl2 & ((~ATTR_MASK & L2_OFFSET) | ATTR_DESCR_MASK)) != L3_PAGE) {
atomic_add_long(&pmap_l2_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
" in pmap %p", va, pmap);
return;
}
+ /*
+ * Both here and in the below "for" loop, to allow for repromotion
+ * after MADV_FREE, conditionally write protect a clean L3E before
+ * possibly aborting the promotion due to other L3E attributes. Why?
+ * Suppose that MADV_FREE is applied to a part of a superpage, the
+ * address range [S, E). pmap_advise() will demote the superpage
+ * mapping, destroy the 4KB page mapping at the end of [S, E), and
+ * set AP_RO and clear AF in the L3Es for the rest of [S, E). Later,
+ * imagine that the memory in [S, E) is recycled, but the last 4KB
+ * page in [S, E) is not the last to be rewritten, or simply accessed.
+ * In other words, there is still a 4KB page in [S, E), call it P,
+ * that is writeable but AP_RO is set and AF is clear in P's L3E.
+ * Unless we write protect P before aborting the promotion, if and
+ * when P is finally rewritten, there won't be a page fault to trigger
+ * repromotion.
+ */
setl2:
if ((newl2 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) ==
(ATTR_S1_AP(ATTR_S1_AP_RO) | ATTR_SW_DBM)) {
@@ -3977,10 +3998,27 @@
goto setl2;
newl2 &= ~ATTR_SW_DBM;
}
+ if ((newl2 & ATTR_AF) == 0) {
+ atomic_add_long(&pmap_l2_p_failures, 1);
+ CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
+ " in pmap %p", va, pmap);
+ return;
+ }
- pa = newl2 + L2_SIZE - PAGE_SIZE;
+ /*
+ * Examine each of the other L3Es in the specified PTP. Abort if this
+ * L3E maps an unexpected 4KB physical page or does not have identical
+ * characteristics to the first L3E.
+ */
+ pa = (newl2 & (~ATTR_MASK | ATTR_DESCR_MASK)) + L2_SIZE - PAGE_SIZE;
for (l3 = firstl3 + NL3PG - 1; l3 > firstl3; l3--) {
oldl3 = pmap_load(l3);
+ if ((oldl3 & (~ATTR_MASK | ATTR_DESCR_MASK)) != pa) {
+ atomic_add_long(&pmap_l2_p_failures, 1);
+ CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
+ " in pmap %p", va, pmap);
+ return;
+ }
setl3:
if ((oldl3 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) ==
(ATTR_S1_AP(ATTR_S1_AP_RO) | ATTR_SW_DBM)) {
@@ -3994,7 +4032,7 @@
goto setl3;
oldl3 &= ~ATTR_SW_DBM;
}
- if (oldl3 != pa) {
+ if ((oldl3 & ATTR_MASK) != (newl2 & ATTR_MASK)) {
atomic_add_long(&pmap_l2_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
" in pmap %p", va, pmap);
@@ -4033,7 +4071,7 @@
atomic_add_long(&pmap_l2_promotions, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va,
- pmap);
+ pmap);
}
#endif /* VM_NRESERVLEVEL > 0 */

File Metadata

Mime Type
text/plain
Expires
Tue, Jan 28, 2:03 AM (10 h, 27 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16237545
Default Alt Text
D36916.diff (6 KB)

Event Timeline