D23067.diff
No OneTemporary
Actions

Size

8 KB

Referenced Files

None

Subscribers

None

D23067.diff
View Options

	Index: head/sys/kern/vfs_subr.c
	===================================================================
	--- head/sys/kern/vfs_subr.c
	+++ head/sys/kern/vfs_subr.c
	@@ -166,6 +166,7 @@
	*/
	static TAILQ_HEAD(freelst, vnode) vnode_list;
	static struct vnode *vnode_list_free_marker;
	+static struct vnode *vnode_list_reclaim_marker;

	/*
	* "Free" vnode target. Free vnodes are rarely completely free, but are
	@@ -653,6 +654,8 @@
	mtx_unlock(&vnode_list_mtx);
	vnode_list_free_marker = vn_alloc_marker(NULL);
	TAILQ_INSERT_HEAD(&vnode_list, vnode_list_free_marker, v_vnodelist);
	+ vnode_list_reclaim_marker = vn_alloc_marker(NULL);
	+ TAILQ_INSERT_HEAD(&vnode_list, vnode_list_reclaim_marker, v_vnodelist);
	vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL,
	vnode_init, vnode_fini, UMA_ALIGN_PTR, 0);
	vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo),
	@@ -1057,6 +1060,17 @@
	}

	/*
	+ * Try to reduce the total number of vnodes.
	+ *
	+ * This routine (and its user) are buggy in at least the following ways:
	+ * - all parameters were picked years ago when RAM sizes were significantly
	+ * smaller
	+ * - it can pick vnodes based on pages used by the vm object, but filesystems
	+ * like ZFS don't use it making the pick broken
	+ * - since ZFS has its own aging policy it gets partially combated by this one
	+ * - a dedicated method should be provided for filesystems to let them decide
	+ * whether the vnode should be recycled
	+ *
	* This routine is called when we have too many vnodes. It attempts
	* to free <count> vnodes and will potentially free vnodes that still
	* have VM backing store (VM backing store is typically the cause
	@@ -1071,118 +1085,116 @@
	* number of vnodes to reach some minimum value regardless of what
	* you set kern.maxvnodes to. Do not set kern.maxvnodes too low.
	*
	- * @param mp Try to reclaim vnodes from this mountpoint
	* @param reclaim_nc_src Only reclaim directories with outgoing namecache
	* entries if this argument is strue
	* @param trigger Only reclaim vnodes with fewer than this many resident
	* pages.
	+ * @param target How many vnodes to reclaim.
	* @return The number of vnodes that were reclaimed.
	*/
	static int
	-vlrureclaim(struct mount *mp, bool reclaim_nc_src, int trigger)
	+vlrureclaim(bool reclaim_nc_src, int trigger, u_long target)
	{
	- struct vnode *vp;
	- int count, done, target;
	+ struct vnode vp, mvp;
	+ struct mount *mp;
	+ u_long done;
	+ bool retried;

	+ mtx_assert(&vnode_list_mtx, MA_OWNED);
	+
	+ retried = false;
	done = 0;
	- vn_start_write(NULL, &mp, V_WAIT);
	- MNT_ILOCK(mp);
	- count = mp->mnt_nvnodelistsize;
	- target = count * (int64_t)gapvnodes / imax(desiredvnodes, 1);
	- target = target / 10 + 1;
	- while (count != 0 && done < target) {
	- vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
	- while (vp != NULL && vp->v_type == VMARKER)
	- vp = TAILQ_NEXT(vp, v_nmntvnodes);
	- if (vp == NULL)
	+
	+ mvp = vnode_list_reclaim_marker;
	+restart:
	+ vp = mvp;
	+ while (done < target) {
	+ vp = TAILQ_NEXT(vp, v_vnodelist);
	+ if (__predict_false(vp == NULL))
	break;
	+
	+ if (__predict_false(vp->v_type == VMARKER))
	+ continue;
	+
	/*
	- * XXX LRU is completely broken for non-free vnodes. First
	- * by calling here in mountpoint order, then by moving
	- * unselected vnodes to the end here, and most grossly by
	- * removing the vlruvp() function that was supposed to
	- * maintain the order. (This function was born broken
	- * since syncer problems prevented it doing anything.) The
	- * order is closer to LRC (C = Created).
	- *
	- * LRU reclaiming of vnodes seems to have last worked in
	- * FreeBSD-3 where LRU wasn't mentioned under any spelling.
	- * Then there was no hold count, and inactive vnodes were
	- * simply put on the free list in LRU order. The separate
	- * lists also break LRU. We prefer to reclaim from the
	- * free list for technical reasons. This tends to thrash
	- * the free list to keep very unrecently used held vnodes.
	- * The problem is mitigated by keeping the free list large.
	- */
	- TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
	- TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
	- --count;
	- if (!VI_TRYLOCK(vp))
	- goto next_iter;
	- /*
	* If it's been deconstructed already, it's still
	* referenced, or it exceeds the trigger, skip it.
	* Also skip free vnodes. We are trying to make space
	* to expand the free list, not reduce it.
	*/
	- if (vp->v_usecount \|\|
	+ if (vp->v_usecount > 0 \|\| vp->v_holdcnt == 0 \|\|
	+ (!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src)))
	+ goto next_iter;
	+
	+ if (vp->v_type == VBAD \|\| vp->v_type == VNON)
	+ goto next_iter;
	+
	+ if (!VI_TRYLOCK(vp))
	+ goto next_iter;
	+
	+ if (vp->v_usecount > 0 \|\| vp->v_holdcnt == 0 \|\|
	(!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src)) \|\|
	- vp->v_holdcnt == 0 \|\|
	- VN_IS_DOOMED(vp) \|\| (vp->v_object != NULL &&
	+ vp->v_type == VBAD \|\| vp->v_type == VNON \|\|
	+ (vp->v_object != NULL &&
	vp->v_object->resident_page_count > trigger)) {
	VI_UNLOCK(vp);
	goto next_iter;
	}
	- MNT_IUNLOCK(mp);
	vholdl(vp);
	- if (VOP_LOCK(vp, LK_INTERLOCK\|LK_EXCLUSIVE\|LK_NOWAIT)) {
	+ VI_UNLOCK(vp);
	+ TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
	+ TAILQ_INSERT_AFTER(&vnode_list, vp, mvp, v_vnodelist);
	+ mtx_unlock(&vnode_list_mtx);
	+
	+ if (vn_start_write(vp, &mp, V_NOWAIT) != 0) {
	vdrop(vp);
	- goto next_iter_mntunlocked;
	+ goto next_iter_unlocked;
	}
	+ if (VOP_LOCK(vp, LK_EXCLUSIVE\|LK_NOWAIT) != 0) {
	+ vdrop(vp);
	+ vn_finished_write(mp);
	+ goto next_iter_unlocked;
	+ }
	+
	VI_LOCK(vp);
	- /*
	- * v_usecount may have been bumped after VOP_LOCK() dropped
	- * the vnode interlock and before it was locked again.
	- *
	- * It is not necessary to recheck VIRF_DOOMED because it can
	- * only be set by another thread that holds both the vnode
	- * lock and vnode interlock. If another thread has the
	- * vnode lock before we get to VOP_LOCK() and obtains the
	- * vnode interlock after VOP_LOCK() drops the vnode
	- * interlock, the other thread will be unable to drop the
	- * vnode lock before our VOP_LOCK() call fails.
	- */
	- if (vp->v_usecount \|\|
	+ if (vp->v_usecount > 0 \|\|
	(!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src)) \|\|
	(vp->v_object != NULL &&
	vp->v_object->resident_page_count > trigger)) {
	VOP_UNLOCK(vp);
	vdropl(vp);
	- goto next_iter_mntunlocked;
	+ vn_finished_write(mp);
	+ goto next_iter_unlocked;
	}
	- KASSERT(!VN_IS_DOOMED(vp),
	- ("VIRF_DOOMED unexpectedly detected in vlrureclaim()"));
	counter_u64_add(recycles_count, 1);
	vgonel(vp);
	VOP_UNLOCK(vp);
	vdropl(vp);
	+ vn_finished_write(mp);
	done++;
	-next_iter_mntunlocked:
	- if (!should_yield())
	- goto relock_mnt;
	- goto yield;
	+next_iter_unlocked:
	+ if (should_yield())
	+ kern_yield(PRI_USER);
	+ mtx_lock(&vnode_list_mtx);
	+ goto restart;
	next_iter:
	+ MPASS(vp->v_type != VMARKER);
	if (!should_yield())
	continue;
	- MNT_IUNLOCK(mp);
	-yield:
	+ TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
	+ TAILQ_INSERT_AFTER(&vnode_list, vp, mvp, v_vnodelist);
	+ mtx_unlock(&vnode_list_mtx);
	kern_yield(PRI_USER);
	-relock_mnt:
	- MNT_ILOCK(mp);
	+ mtx_lock(&vnode_list_mtx);
	+ goto restart;
	}
	- MNT_IUNLOCK(mp);
	- vn_finished_write(mp);
	- return done;
	+ if (done == 0 && !retried) {
	+ TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
	+ TAILQ_INSERT_HEAD(&vnode_list, mvp, v_vnodelist);
	+ retried = true;
	+ goto restart;
	+ }
	+ return (done);
	}

	static int max_vnlru_free = 10000; /* limit on vnode free requests per call */
	@@ -1291,8 +1303,7 @@
	static void
	vnlru_proc(void)
	{
	- u_long rnumvnodes, rfreevnodes;
	- struct mount mp, nmp;
	+ u_long rnumvnodes, rfreevnodes, target;
	unsigned long onumvnodes;
	int done, force, trigger, usevnodes, vsp;
	bool reclaim_nc_src;
	@@ -1331,9 +1342,6 @@
	PVFS\|PDROP, "vlruwt", hz);
	continue;
	}
	- mtx_unlock(&vnode_list_mtx);
	- done = 0;
	- rnumvnodes = atomic_load_long(&numvnodes);
	rfreevnodes = atomic_load_long(&freevnodes);

	onumvnodes = rnumvnodes;
	@@ -1362,18 +1370,10 @@
	if (force < 2)
	trigger = vsmalltrigger;
	reclaim_nc_src = force >= 3;
	- mtx_lock(&mountlist_mtx);
	- for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
	- if (vfs_busy(mp, MBF_NOWAIT \| MBF_MNTLSTLOCK)) {
	- nmp = TAILQ_NEXT(mp, mnt_list);
	- continue;
	- }
	- done += vlrureclaim(mp, reclaim_nc_src, trigger);
	- mtx_lock(&mountlist_mtx);
	- nmp = TAILQ_NEXT(mp, mnt_list);
	- vfs_unbusy(mp);
	- }
	- mtx_unlock(&mountlist_mtx);
	+ target = rnumvnodes * (int64_t)gapvnodes / imax(desiredvnodes, 1);
	+ target = target / 10 + 1;
	+ done = vlrureclaim(reclaim_nc_src, trigger, target);
	+ mtx_unlock(&vnode_list_mtx);
	if (onumvnodes > desiredvnodes && numvnodes <= desiredvnodes)
	uma_reclaim(UMA_RECLAIM_DRAIN);
	if (done == 0) {

File Metadata

Mime Type: text/plain
Expires: Sun, Feb 9, 8:57 AM (20 h, 25 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 16549661
Default Alt Text: D23067.diff (8 KB)

D23067.diffNo OneTemporaryActions

D23067.diffView Options

File Metadata

Event Timeline

D23067.diff
No OneTemporary
Actions

D23067.diff
View Options