Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F109687512
D23067.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
8 KB
Referenced Files
None
Subscribers
None
D23067.diff
View Options
Index: head/sys/kern/vfs_subr.c
===================================================================
--- head/sys/kern/vfs_subr.c
+++ head/sys/kern/vfs_subr.c
@@ -166,6 +166,7 @@
*/
static TAILQ_HEAD(freelst, vnode) vnode_list;
static struct vnode *vnode_list_free_marker;
+static struct vnode *vnode_list_reclaim_marker;
/*
* "Free" vnode target. Free vnodes are rarely completely free, but are
@@ -653,6 +654,8 @@
mtx_unlock(&vnode_list_mtx);
vnode_list_free_marker = vn_alloc_marker(NULL);
TAILQ_INSERT_HEAD(&vnode_list, vnode_list_free_marker, v_vnodelist);
+ vnode_list_reclaim_marker = vn_alloc_marker(NULL);
+ TAILQ_INSERT_HEAD(&vnode_list, vnode_list_reclaim_marker, v_vnodelist);
vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL,
vnode_init, vnode_fini, UMA_ALIGN_PTR, 0);
vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo),
@@ -1057,6 +1060,17 @@
}
/*
+ * Try to reduce the total number of vnodes.
+ *
+ * This routine (and its user) are buggy in at least the following ways:
+ * - all parameters were picked years ago when RAM sizes were significantly
+ * smaller
+ * - it can pick vnodes based on pages used by the vm object, but filesystems
+ * like ZFS don't use it making the pick broken
+ * - since ZFS has its own aging policy it gets partially combated by this one
+ * - a dedicated method should be provided for filesystems to let them decide
+ * whether the vnode should be recycled
+ *
* This routine is called when we have too many vnodes. It attempts
* to free <count> vnodes and will potentially free vnodes that still
* have VM backing store (VM backing store is typically the cause
@@ -1071,118 +1085,116 @@
* number of vnodes to reach some minimum value regardless of what
* you set kern.maxvnodes to. Do not set kern.maxvnodes too low.
*
- * @param mp Try to reclaim vnodes from this mountpoint
* @param reclaim_nc_src Only reclaim directories with outgoing namecache
* entries if this argument is strue
* @param trigger Only reclaim vnodes with fewer than this many resident
* pages.
+ * @param target How many vnodes to reclaim.
* @return The number of vnodes that were reclaimed.
*/
static int
-vlrureclaim(struct mount *mp, bool reclaim_nc_src, int trigger)
+vlrureclaim(bool reclaim_nc_src, int trigger, u_long target)
{
- struct vnode *vp;
- int count, done, target;
+ struct vnode *vp, *mvp;
+ struct mount *mp;
+ u_long done;
+ bool retried;
+ mtx_assert(&vnode_list_mtx, MA_OWNED);
+
+ retried = false;
done = 0;
- vn_start_write(NULL, &mp, V_WAIT);
- MNT_ILOCK(mp);
- count = mp->mnt_nvnodelistsize;
- target = count * (int64_t)gapvnodes / imax(desiredvnodes, 1);
- target = target / 10 + 1;
- while (count != 0 && done < target) {
- vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
- while (vp != NULL && vp->v_type == VMARKER)
- vp = TAILQ_NEXT(vp, v_nmntvnodes);
- if (vp == NULL)
+
+ mvp = vnode_list_reclaim_marker;
+restart:
+ vp = mvp;
+ while (done < target) {
+ vp = TAILQ_NEXT(vp, v_vnodelist);
+ if (__predict_false(vp == NULL))
break;
+
+ if (__predict_false(vp->v_type == VMARKER))
+ continue;
+
/*
- * XXX LRU is completely broken for non-free vnodes. First
- * by calling here in mountpoint order, then by moving
- * unselected vnodes to the end here, and most grossly by
- * removing the vlruvp() function that was supposed to
- * maintain the order. (This function was born broken
- * since syncer problems prevented it doing anything.) The
- * order is closer to LRC (C = Created).
- *
- * LRU reclaiming of vnodes seems to have last worked in
- * FreeBSD-3 where LRU wasn't mentioned under any spelling.
- * Then there was no hold count, and inactive vnodes were
- * simply put on the free list in LRU order. The separate
- * lists also break LRU. We prefer to reclaim from the
- * free list for technical reasons. This tends to thrash
- * the free list to keep very unrecently used held vnodes.
- * The problem is mitigated by keeping the free list large.
- */
- TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
- TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
- --count;
- if (!VI_TRYLOCK(vp))
- goto next_iter;
- /*
* If it's been deconstructed already, it's still
* referenced, or it exceeds the trigger, skip it.
* Also skip free vnodes. We are trying to make space
* to expand the free list, not reduce it.
*/
- if (vp->v_usecount ||
+ if (vp->v_usecount > 0 || vp->v_holdcnt == 0 ||
+ (!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src)))
+ goto next_iter;
+
+ if (vp->v_type == VBAD || vp->v_type == VNON)
+ goto next_iter;
+
+ if (!VI_TRYLOCK(vp))
+ goto next_iter;
+
+ if (vp->v_usecount > 0 || vp->v_holdcnt == 0 ||
(!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src)) ||
- vp->v_holdcnt == 0 ||
- VN_IS_DOOMED(vp) || (vp->v_object != NULL &&
+ vp->v_type == VBAD || vp->v_type == VNON ||
+ (vp->v_object != NULL &&
vp->v_object->resident_page_count > trigger)) {
VI_UNLOCK(vp);
goto next_iter;
}
- MNT_IUNLOCK(mp);
vholdl(vp);
- if (VOP_LOCK(vp, LK_INTERLOCK|LK_EXCLUSIVE|LK_NOWAIT)) {
+ VI_UNLOCK(vp);
+ TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
+ TAILQ_INSERT_AFTER(&vnode_list, vp, mvp, v_vnodelist);
+ mtx_unlock(&vnode_list_mtx);
+
+ if (vn_start_write(vp, &mp, V_NOWAIT) != 0) {
vdrop(vp);
- goto next_iter_mntunlocked;
+ goto next_iter_unlocked;
}
+ if (VOP_LOCK(vp, LK_EXCLUSIVE|LK_NOWAIT) != 0) {
+ vdrop(vp);
+ vn_finished_write(mp);
+ goto next_iter_unlocked;
+ }
+
VI_LOCK(vp);
- /*
- * v_usecount may have been bumped after VOP_LOCK() dropped
- * the vnode interlock and before it was locked again.
- *
- * It is not necessary to recheck VIRF_DOOMED because it can
- * only be set by another thread that holds both the vnode
- * lock and vnode interlock. If another thread has the
- * vnode lock before we get to VOP_LOCK() and obtains the
- * vnode interlock after VOP_LOCK() drops the vnode
- * interlock, the other thread will be unable to drop the
- * vnode lock before our VOP_LOCK() call fails.
- */
- if (vp->v_usecount ||
+ if (vp->v_usecount > 0 ||
(!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src)) ||
(vp->v_object != NULL &&
vp->v_object->resident_page_count > trigger)) {
VOP_UNLOCK(vp);
vdropl(vp);
- goto next_iter_mntunlocked;
+ vn_finished_write(mp);
+ goto next_iter_unlocked;
}
- KASSERT(!VN_IS_DOOMED(vp),
- ("VIRF_DOOMED unexpectedly detected in vlrureclaim()"));
counter_u64_add(recycles_count, 1);
vgonel(vp);
VOP_UNLOCK(vp);
vdropl(vp);
+ vn_finished_write(mp);
done++;
-next_iter_mntunlocked:
- if (!should_yield())
- goto relock_mnt;
- goto yield;
+next_iter_unlocked:
+ if (should_yield())
+ kern_yield(PRI_USER);
+ mtx_lock(&vnode_list_mtx);
+ goto restart;
next_iter:
+ MPASS(vp->v_type != VMARKER);
if (!should_yield())
continue;
- MNT_IUNLOCK(mp);
-yield:
+ TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
+ TAILQ_INSERT_AFTER(&vnode_list, vp, mvp, v_vnodelist);
+ mtx_unlock(&vnode_list_mtx);
kern_yield(PRI_USER);
-relock_mnt:
- MNT_ILOCK(mp);
+ mtx_lock(&vnode_list_mtx);
+ goto restart;
}
- MNT_IUNLOCK(mp);
- vn_finished_write(mp);
- return done;
+ if (done == 0 && !retried) {
+ TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
+ TAILQ_INSERT_HEAD(&vnode_list, mvp, v_vnodelist);
+ retried = true;
+ goto restart;
+ }
+ return (done);
}
static int max_vnlru_free = 10000; /* limit on vnode free requests per call */
@@ -1291,8 +1303,7 @@
static void
vnlru_proc(void)
{
- u_long rnumvnodes, rfreevnodes;
- struct mount *mp, *nmp;
+ u_long rnumvnodes, rfreevnodes, target;
unsigned long onumvnodes;
int done, force, trigger, usevnodes, vsp;
bool reclaim_nc_src;
@@ -1331,9 +1342,6 @@
PVFS|PDROP, "vlruwt", hz);
continue;
}
- mtx_unlock(&vnode_list_mtx);
- done = 0;
- rnumvnodes = atomic_load_long(&numvnodes);
rfreevnodes = atomic_load_long(&freevnodes);
onumvnodes = rnumvnodes;
@@ -1362,18 +1370,10 @@
if (force < 2)
trigger = vsmalltrigger;
reclaim_nc_src = force >= 3;
- mtx_lock(&mountlist_mtx);
- for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
- if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
- nmp = TAILQ_NEXT(mp, mnt_list);
- continue;
- }
- done += vlrureclaim(mp, reclaim_nc_src, trigger);
- mtx_lock(&mountlist_mtx);
- nmp = TAILQ_NEXT(mp, mnt_list);
- vfs_unbusy(mp);
- }
- mtx_unlock(&mountlist_mtx);
+ target = rnumvnodes * (int64_t)gapvnodes / imax(desiredvnodes, 1);
+ target = target / 10 + 1;
+ done = vlrureclaim(reclaim_nc_src, trigger, target);
+ mtx_unlock(&vnode_list_mtx);
if (onumvnodes > desiredvnodes && numvnodes <= desiredvnodes)
uma_reclaim(UMA_RECLAIM_DRAIN);
if (done == 0) {
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Feb 9, 8:57 AM (20 h, 25 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16549661
Default Alt Text
D23067.diff (8 KB)
Attached To
Mode
D23067: vfs: reimplement vlrureclaim to actually use LRU
Attached
Detach File
Event Timeline
Log In to Comment