Page MenuHomeFreeBSD

D31016.diff
No OneTemporary

D31016.diff

diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h
--- a/sys/fs/nullfs/null.h
+++ b/sys/fs/nullfs/null.h
@@ -45,6 +45,8 @@
struct mount *nullm_vfs;
struct vnode *nullm_lowerrootvp; /* Ref to lower root vnode */
uint64_t nullm_flags;
+ struct mount_upper_node upper_node;
+ struct mount_upper_node notify_node;
};
#ifdef _KERNEL
diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c
--- a/sys/fs/nullfs/null_vfsops.c
+++ b/sys/fs/nullfs/null_vfsops.c
@@ -163,7 +163,8 @@
* Save pointer to underlying FS and the reference to the
* lower root vnode.
*/
- xmp->nullm_vfs = vfs_pin_from_vp(lowerrootvp);
+ xmp->nullm_vfs = vfs_register_upper_from_vp(lowerrootvp, mp,
+ &xmp->upper_node);
if (xmp->nullm_vfs == NULL) {
vput(lowerrootvp);
free(xmp, M_NULLFSMNT);
@@ -178,7 +179,7 @@
*/
error = null_nodeget(mp, lowerrootvp, &nullm_rootvp);
if (error != 0) {
- vfs_unpin(xmp->nullm_vfs);
+ vfs_unregister_upper(xmp->nullm_vfs, &xmp->upper_node);
vrele(lowerrootvp);
free(xmp, M_NULLFSMNT);
return (error);
@@ -195,6 +196,11 @@
(xmp->nullm_vfs->mnt_kern_flag & MNTK_NULL_NOCACHE) != 0)
xmp->nullm_flags &= ~NULLM_CACHE;
+ if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
+ vfs_register_for_notification(xmp->nullm_vfs, mp,
+ &xmp->notify_node);
+ }
+
MNT_ILOCK(mp);
if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag &
@@ -206,13 +212,6 @@
(MNTK_USES_BCACHE | MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS);
MNT_IUNLOCK(mp);
vfs_getnewfsid(mp);
- if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
- MNT_ILOCK(xmp->nullm_vfs);
- TAILQ_INSERT_TAIL(&xmp->nullm_vfs->mnt_uppers, mp,
- mnt_upper_link);
- MNT_IUNLOCK(xmp->nullm_vfs);
- }
-
vfs_mountedfrom(mp, target);
vput(nullm_rootvp);
@@ -230,7 +229,6 @@
int mntflags;
{
struct null_mount *mntdata;
- struct mount *ump;
int error, flags;
NULLFSDEBUG("nullfs_unmount: mp = %p\n", (void *)mp);
@@ -259,17 +257,11 @@
* Finally, throw away the null_mount structure
*/
mntdata = mp->mnt_data;
- ump = mntdata->nullm_vfs;
if ((mntdata->nullm_flags & NULLM_CACHE) != 0) {
- MNT_ILOCK(ump);
- while ((ump->mnt_kern_flag & MNTK_VGONE_UPPER) != 0) {
- ump->mnt_kern_flag |= MNTK_VGONE_WAITER;
- msleep(&ump->mnt_uppers, &ump->mnt_mtx, 0, "vgnupw", 0);
- }
- TAILQ_REMOVE(&ump->mnt_uppers, mp, mnt_upper_link);
- MNT_IUNLOCK(ump);
+ vfs_unregister_for_notification(mntdata->nullm_vfs,
+ &mntdata->notify_node);
}
- vfs_unpin(ump);
+ vfs_unregister_upper(mntdata->nullm_vfs, &mntdata->upper_node);
vrele(mntdata->nullm_lowerrootvp);
mp->mnt_data = NULL;
free(mntdata, M_NULLFSMNT);
diff --git a/sys/fs/unionfs/union.h b/sys/fs/unionfs/union.h
--- a/sys/fs/unionfs/union.h
+++ b/sys/fs/unionfs/union.h
@@ -57,6 +57,8 @@
struct vnode *um_lowervp; /* VREFed once */
struct vnode *um_uppervp; /* VREFed once */
struct vnode *um_rootvp; /* ROOT vnode */
+ struct mount_upper_node um_lower_link; /* node in lower FS list of uppers */
+ struct mount_upper_node um_upper_link; /* node in upper FS list of uppers */
unionfs_copymode um_copymode;
unionfs_whitemode um_whitemode;
uid_t um_uid;
diff --git a/sys/fs/unionfs/union_vfsops.c b/sys/fs/unionfs/union_vfsops.c
--- a/sys/fs/unionfs/union_vfsops.c
+++ b/sys/fs/unionfs/union_vfsops.c
@@ -292,14 +292,16 @@
return (error);
}
- lowermp = vfs_pin_from_vp(ump->um_lowervp);
- uppermp = vfs_pin_from_vp(ump->um_uppervp);
+ lowermp = vfs_register_upper_from_vp(ump->um_lowervp, mp,
+ &ump->um_lower_link);
+ uppermp = vfs_register_upper_from_vp(ump->um_uppervp, mp,
+ &ump->um_upper_link);
if (lowermp == NULL || uppermp == NULL) {
if (lowermp != NULL)
- vfs_unpin(lowermp);
+ vfs_unregister_upper(lowermp, &ump->um_lower_link);
if (uppermp != NULL)
- vfs_unpin(uppermp);
+ vfs_unregister_upper(uppermp, &ump->um_upper_link);
free(ump, M_UNIONFSMNT);
mp->mnt_data = NULL;
return (ENOENT);
@@ -357,8 +359,8 @@
if (error)
return (error);
- vfs_unpin(ump->um_lowervp->v_mount);
- vfs_unpin(ump->um_uppervp->v_mount);
+ vfs_unregister_upper(ump->um_lowervp->v_mount, &ump->um_lower_link);
+ vfs_unregister_upper(ump->um_uppervp->v_mount, &ump->um_upper_link);
free(ump, M_UNIONFSMNT);
mp->mnt_data = NULL;
diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c
--- a/sys/kern/vfs_mount.c
+++ b/sys/kern/vfs_mount.c
@@ -65,6 +65,7 @@
#include <sys/sysctl.h>
#include <sys/sysent.h>
#include <sys/systm.h>
+#include <sys/taskqueue.h>
#include <sys/vnode.h>
#include <vm/uma.h>
@@ -89,6 +90,11 @@
SYSCTL_BOOL(_vfs, OID_AUTO, default_autoro, CTLFLAG_RW, &default_autoro, 0,
"Retry failed r/w mount as r/o if no explicit ro/rw option is specified");
+static bool recursive_forced_unmount = false;
+SYSCTL_BOOL(_vfs, OID_AUTO, recursive_forced_unmount, CTLFLAG_RW,
+ &recursive_forced_unmount, 0, "Recursively unmount stacked upper mounts"
+ " when a file system is forcibly unmounted");
+
MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
MALLOC_DEFINE(M_STATFS, "statfs", "statfs structure");
static uma_zone_t mount_zone;
@@ -103,6 +109,16 @@
EVENTHANDLER_LIST_DEFINE(vfs_mounted);
EVENTHANDLER_LIST_DEFINE(vfs_unmounted);
+static void vfs_deferred_unmount(void *arg, int pending);
+static struct task deferred_unmount_task =
+ TASK_INITIALIZER(0, vfs_deferred_unmount, NULL);;
+static struct mtx deferred_unmount_lock;
+MTX_SYSINIT(deferred_unmount, &deferred_unmount_lock, "deferred_unmount",
+ MTX_DEF);
+static STAILQ_HEAD(, mount) deferred_unmount_list =
+ STAILQ_HEAD_INITIALIZER(deferred_unmount_list);
+TASKQUEUE_DEFINE_THREAD(deferred_unmount);
+
static void mount_devctl_event(const char *type, struct mount *mp, bool donew);
/*
@@ -505,8 +521,21 @@
MNT_IUNLOCK(mp);
}
+/*
+ * Register ump as an upper mount of the mount associated with
+ * vnode vp. This registration will be tracked through
+ * mount_upper_node upper, which should be allocated by the
+ * caller and stored in per-mount data associated with mp.
+ *
+ * If successful, this function will return the mount associated
+ * with vp, and will ensure that it cannot be unmounted until
+ * ump has been unregistered as one of its upper mounts.
+ *
+ * Upon failure this function will return NULL.
+ */
struct mount *
-vfs_pin_from_vp(struct vnode *vp)
+vfs_register_upper_from_vp(struct vnode *vp, struct mount *ump,
+ struct mount_upper_node *upper)
{
struct mount *mp;
@@ -514,26 +543,81 @@
if (mp == NULL)
return (NULL);
MNT_ILOCK(mp);
- if (mp != vp->v_mount || (mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) {
+ if (mp != vp->v_mount ||
+ ((mp->mnt_kern_flag & (MNTK_UNMOUNT | MNTK_RECURSE)) != 0)) {
MNT_IUNLOCK(mp);
return (NULL);
}
+ KASSERT(ump != mp, ("upper and lower mounts are identical"));
+ upper->mp = ump;
MNT_REF(mp);
- KASSERT(mp->mnt_pinned_count < INT_MAX,
- ("mount pinned count overflow"));
- ++mp->mnt_pinned_count;
+ TAILQ_INSERT_TAIL(&mp->mnt_uppers, upper, mnt_upper_link);
MNT_IUNLOCK(mp);
return (mp);
}
+/*
+ * Register upper mount ump to receive vnode unlink/reclaim
+ * notifications from lower mount mp. This registration will
+ * be tracked through mount_upper_node upper, which should be
+ * allocated by the caller and stored in per-mount data
+ * associated with mp.
+ *
+ * ump must already be registered as an upper mount of mp
+ * through a call to vfs_register_upper_from_vp().
+ */
void
-vfs_unpin(struct mount *mp)
+vfs_register_for_notification(struct mount *mp, struct mount *ump,
+ struct mount_upper_node *upper)
+{
+ upper->mp = ump;
+ MNT_ILOCK(mp);
+ TAILQ_INSERT_TAIL(&mp->mnt_notify, upper, mnt_upper_link);
+ MNT_IUNLOCK(mp);
+}
+
+static void
+vfs_drain_upper_locked(struct mount *mp)
+{
+ mtx_assert(MNT_MTX(mp), MA_OWNED);
+ while (mp->mnt_upper_pending != 0) {
+ mp->mnt_kern_flag |= MNTK_UPPER_WAITER;
+ msleep(&mp->mnt_uppers, MNT_MTX(mp), 0, "mntupw", 0);
+ }
+}
+
+/*
+ * Undo a previous call to vfs_register_for_notification().
+ * The mount represented by upper must be currently registered
+ * as an upper mount for mp.
+ */
+void
+vfs_unregister_for_notification(struct mount *mp,
+ struct mount_upper_node *upper)
+{
+ MNT_ILOCK(mp);
+ vfs_drain_upper_locked(mp);
+ TAILQ_REMOVE(&mp->mnt_notify, upper, mnt_upper_link);
+ MNT_IUNLOCK(mp);
+}
+
+/*
+ * Undo a previous call to vfs_register_upper_from_vp().
+ * This must be done before mp can be unmounted.
+ */
+void
+vfs_unregister_upper(struct mount *mp, struct mount_upper_node *upper)
{
MNT_ILOCK(mp);
- KASSERT(mp->mnt_pinned_count > 0, ("mount pinned count underflow"));
KASSERT((mp->mnt_kern_flag & MNTK_UNMOUNT) == 0,
- ("mount pinned with pending unmount"));
- --mp->mnt_pinned_count;
+ ("registered upper with pending unmount"));
+ vfs_drain_upper_locked(mp);
+ TAILQ_REMOVE(&mp->mnt_uppers, upper, mnt_upper_link);
+ if ((mp->mnt_kern_flag & MNTK_TASKQUEUE_WAITER) != 0 &&
+ TAILQ_EMPTY(&mp->mnt_uppers)) {
+ mp->mnt_kern_flag &= ~MNTK_TASKQUEUE_WAITER;
+ wakeup(&mp->mnt_taskqueue_link);
+ }
MNT_REL(mp);
MNT_IUNLOCK(mp);
}
@@ -600,8 +684,10 @@
mac_mount_create(cred, mp);
#endif
arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0);
+ mp->mnt_upper_pending = 0;
TAILQ_INIT(&mp->mnt_uppers);
- mp->mnt_pinned_count = 0;
+ TAILQ_INIT(&mp->mnt_notify);
+ mp->mnt_taskqueue_flags = 0;
return (mp);
}
@@ -640,9 +726,9 @@
vn_printf(vp, "dangling vnode ");
panic("unmount: dangling vnode");
}
- KASSERT(mp->mnt_pinned_count == 0,
- ("mnt_pinned_count = %d", mp->mnt_pinned_count));
+ KASSERT(mp->mnt_upper_pending == 0, ("mnt_upper_pending"));
KASSERT(TAILQ_EMPTY(&mp->mnt_uppers), ("mnt_uppers"));
+ KASSERT(TAILQ_EMPTY(&mp->mnt_notify), ("mnt_notify"));
if (mp->mnt_nvnodelistsize != 0)
panic("vfs_mount_destroy: nonzero nvnodelistsize");
if (mp->mnt_lazyvnodelistsize != 0)
@@ -1799,17 +1885,166 @@
return (sum);
}
+static bool
+deferred_unmount_enqueue(struct mount *mp, uint64_t flags, bool requeue)
+{
+ bool enqueued;
+
+ enqueued = false;
+ mtx_lock(&deferred_unmount_lock);
+ if ((mp->mnt_taskqueue_flags & MNT_DEFERRED) == 0 || requeue) {
+ mp->mnt_taskqueue_flags = flags | MNT_DEFERRED;
+ STAILQ_INSERT_TAIL(&deferred_unmount_list, mp,
+ mnt_taskqueue_link);
+ enqueued = true;
+ }
+ mtx_unlock(&deferred_unmount_lock);
+
+ if (enqueued) {
+ taskqueue_enqueue(taskqueue_deferred_unmount,
+ &deferred_unmount_task);
+ }
+
+ return (enqueued);
+}
+
+/*
+ * Taskqueue handler for processing async/recursive unmounts
+ */
+static void
+vfs_deferred_unmount(void *argi __unused, int pending __unused)
+{
+ STAILQ_HEAD(, mount) local_unmounts;
+ uint64_t flags;
+ struct mount *mp, *tmp;
+ bool unmounted;
+
+ STAILQ_INIT(&local_unmounts);
+ mtx_lock(&deferred_unmount_lock);
+ STAILQ_CONCAT(&local_unmounts, &deferred_unmount_list);
+ mtx_unlock(&deferred_unmount_lock);
+
+ STAILQ_FOREACH_SAFE(mp, &local_unmounts, mnt_taskqueue_link, tmp) {
+ flags = mp->mnt_taskqueue_flags;
+ KASSERT((flags & MNT_DEFERRED) != 0,
+ ("taskqueue unmount without MNT_DEFERRED"));
+ if (dounmount(mp, flags, curthread) != 0) {
+ MNT_ILOCK(mp);
+ unmounted = ((mp->mnt_kern_flag & MNTK_REFEXPIRE) != 0);
+ MNT_IUNLOCK(mp);
+ if (!unmounted)
+ deferred_unmount_enqueue(mp, flags, true);
+ else
+ vfs_rel(mp);
+ }
+ }
+}
+
/*
* Do the actual filesystem unmount.
*/
int
-dounmount(struct mount *mp, int flags, struct thread *td)
+dounmount(struct mount *mp, uint64_t flags, struct thread *td)
{
+ struct mount_upper_node *upper;
struct vnode *coveredvp, *rootvp;
int error;
uint64_t async_flag;
int mnt_gen_r;
+ KASSERT((flags & MNT_DEFERRED) == 0 ||
+ (flags & (MNT_RECURSE | MNT_FORCE)) == (MNT_RECURSE | MNT_FORCE),
+ ("MNT_DEFERRED requires MNT_RECURSE | MNT_FORCE"));
+
+ /*
+ * If the caller has explicitly requested the unmount to be handled by
+ * the taskqueue and we're not already in taskqueue context, queue
+ * up the unmount request and exit. This is done prior to any
+ * credential checks; MNT_DEFERRED should be used only for kernel-
+ * initiated unmounts and will therefore be processed with the
+ * (kernel) credentials of the taskqueue thread. Still, callers
+ * should be sure this is the behavior they want.
+ */
+ if ((flags & MNT_DEFERRED) != 0 &&
+ taskqueue_member(taskqueue_deferred_unmount, curthread) == 0) {
+ if (!deferred_unmount_enqueue(mp, flags, false))
+ vfs_rel(mp);
+ return (EINPROGRESS);
+ }
+
+ /*
+ * Only privileged root, or (if MNT_USER is set) the user that did the
+ * original mount is permitted to unmount this filesystem.
+ * This check should be made prior to queueing up any recursive
+ * unmounts of upper filesystems. Those unmounts will be executed
+ * with kernel thread credentials and are expected to succeed, so
+ * we must at least ensure the originating context has sufficient
+ * privilege to unmount the base filesystem before proceeding with
+ * the uppers.
+ */
+ error = vfs_suser(mp, td);
+ if (error != 0) {
+ KASSERT((flags & MNT_DEFERRED) == 0,
+ ("taskqueue unmount with insufficient privilege"));
+ vfs_rel(mp);
+ return (error);
+ }
+
+ if (recursive_forced_unmount && ((flags & MNT_FORCE) != 0))
+ flags |= MNT_RECURSE;
+
+ if ((flags & MNT_RECURSE) != 0) {
+ KASSERT((flags & MNT_FORCE) != 0,
+ ("MNT_RECURSE requires MNT_FORCE"));
+
+ MNT_ILOCK(mp);
+ /*
+ * Set MNTK_RECURSE to prevent new upper mounts from being
+ * added, and note that an operation on the uppers list is in
+ * progress. This will ensure that unregistration from the
+ * uppers list, and therefore any pending unmount of the upper
+ * FS, can't complete until after we finish walking the list.
+ */
+ mp->mnt_kern_flag |= MNTK_RECURSE;
+ mp->mnt_upper_pending++;
+ TAILQ_FOREACH(upper, &mp->mnt_uppers, mnt_upper_link) {
+ MNT_IUNLOCK(mp);
+ vfs_ref(upper->mp);
+ if (!deferred_unmount_enqueue(upper->mp, flags, false))
+ vfs_rel(upper->mp);
+ MNT_ILOCK(mp);
+ }
+ mp->mnt_upper_pending--;
+ if ((mp->mnt_kern_flag & MNTK_UPPER_WAITER) != 0 &&
+ mp->mnt_upper_pending == 0) {
+ mp->mnt_kern_flag &= ~MNTK_UPPER_WAITER;
+ wakeup(&mp->mnt_uppers);
+ }
+ /*
+ * If we're not on the taskqueue, wait until the uppers list
+ * is drained before proceeding with unmount. Otherwise, if
+ * we are on the taskqueue and there are still pending uppers,
+ * just re-enqueue on the end of the taskqueue.
+ */
+ if ((flags & MNT_DEFERRED) == 0) {
+ while (!TAILQ_EMPTY(&mp->mnt_uppers)) {
+ mp->mnt_kern_flag |= MNTK_TASKQUEUE_WAITER;
+ msleep(&mp->mnt_taskqueue_link, MNT_MTX(mp), 0,
+ "umntqw", 0);
+ }
+ } else if (!TAILQ_EMPTY(&mp->mnt_uppers)) {
+ MNT_IUNLOCK(mp);
+ deferred_unmount_enqueue(mp, flags, true);
+ return (0);
+ }
+ MNT_IUNLOCK(mp);
+ KASSERT(TAILQ_EMPTY(&mp->mnt_uppers), ("mnt_uppers not empty"));
+ }
+
+ /* Allow the taskqueue to safely re-enqueue on failure */
+ if ((flags & MNT_DEFERRED) != 0)
+ vfs_ref(mp);
+
if ((coveredvp = mp->mnt_vnodecovered) != NULL) {
mnt_gen_r = mp->mnt_gen;
VI_LOCK(coveredvp);
@@ -1828,27 +2063,13 @@
}
}
- /*
- * Only privileged root, or (if MNT_USER is set) the user that did the
- * original mount is permitted to unmount this filesystem.
- */
- error = vfs_suser(mp, td);
- if (error != 0) {
- if (coveredvp != NULL) {
- VOP_UNLOCK(coveredvp);
- vdrop(coveredvp);
- }
- vfs_rel(mp);
- return (error);
- }
-
vfs_op_enter(mp);
vn_start_write(NULL, &mp, V_WAIT | V_MNTREF);
MNT_ILOCK(mp);
if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0 ||
(mp->mnt_flag & MNT_UPDATE) != 0 ||
- mp->mnt_pinned_count != 0) {
+ !TAILQ_EMPTY(&mp->mnt_uppers)) {
dounmount_cleanup(mp, coveredvp, 0);
return (EBUSY);
}
@@ -1952,6 +2173,7 @@
}
return (error);
}
+
mtx_lock(&mountlist_mtx);
TAILQ_REMOVE(&mountlist, mp, mnt_list);
mtx_unlock(&mountlist_mtx);
@@ -1977,6 +2199,8 @@
}
if (mp == rootdevmp)
rootdevmp = NULL;
+ if ((flags & MNT_DEFERRED) != 0)
+ vfs_rel(mp);
vfs_mount_destroy(mp);
return (0);
}
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -831,9 +831,9 @@
* valid.
*/
while (mp->mnt_kern_flag & MNTK_UNMOUNT) {
- KASSERT(mp->mnt_pinned_count == 0,
- ("%s: non-zero pinned count %d with pending unmount",
- __func__, mp->mnt_pinned_count));
+ KASSERT(TAILQ_EMPTY(&mp->mnt_uppers),
+ ("%s: non-empty upper mount list with pending unmount",
+ __func__));
if (flags & MBF_NOWAIT || mp->mnt_kern_flag & MNTK_REFEXPIRE) {
MNT_REL(mp);
MNT_IUNLOCK(mp);
@@ -3897,6 +3897,11 @@
{
}
+struct notify_mount {
+ struct mount mp;
+ struct mount_upper_node upper;
+};
+
/*
* Notify upper mounts about reclaimed or unlinked vnode.
*/
@@ -3907,45 +3912,52 @@
.vfs_reclaim_lowervp = notify_lowervp_vfs_dummy,
.vfs_unlink_lowervp = notify_lowervp_vfs_dummy,
};
- struct mount *mp, *ump, *mmp;
+ struct mount *mp;
+ struct mount_upper_node *ump;
+ struct notify_mount *mmp;
mp = vp->v_mount;
if (mp == NULL)
return;
- if (TAILQ_EMPTY(&mp->mnt_uppers))
+ if (TAILQ_EMPTY(&mp->mnt_notify))
return;
- mmp = malloc(sizeof(struct mount), M_TEMP, M_WAITOK | M_ZERO);
- mmp->mnt_op = &vgonel_vfsops;
- mmp->mnt_kern_flag |= MNTK_MARKER;
+ mmp = malloc(sizeof(*mmp), M_TEMP, M_WAITOK | M_ZERO);
+ mmp->mp.mnt_op = &vgonel_vfsops;
+ mmp->mp.mnt_kern_flag |= MNTK_MARKER;
+ mmp->upper.mp = &mmp->mp;
MNT_ILOCK(mp);
- mp->mnt_kern_flag |= MNTK_VGONE_UPPER;
- for (ump = TAILQ_FIRST(&mp->mnt_uppers); ump != NULL;) {
- if ((ump->mnt_kern_flag & MNTK_MARKER) != 0) {
+ mp->mnt_upper_pending++;
+ KASSERT(mp->mnt_upper_pending > 0,
+ ("%s: mnt_upper_pending %d", __func__, mp->mnt_upper_pending));
+ for (ump = TAILQ_FIRST(&mp->mnt_notify); ump != NULL;) {
+ if ((ump->mp->mnt_kern_flag & MNTK_MARKER) != 0) {
ump = TAILQ_NEXT(ump, mnt_upper_link);
continue;
}
- TAILQ_INSERT_AFTER(&mp->mnt_uppers, ump, mmp, mnt_upper_link);
+ TAILQ_INSERT_AFTER(&mp->mnt_notify, ump, &mmp->upper,
+ mnt_upper_link);
MNT_IUNLOCK(mp);
switch (event) {
case VFS_NOTIFY_UPPER_RECLAIM:
- VFS_RECLAIM_LOWERVP(ump, vp);
+ VFS_RECLAIM_LOWERVP(ump->mp, vp);
break;
case VFS_NOTIFY_UPPER_UNLINK:
- VFS_UNLINK_LOWERVP(ump, vp);
+ VFS_UNLINK_LOWERVP(ump->mp, vp);
break;
default:
KASSERT(0, ("invalid event %d", event));
break;
}
MNT_ILOCK(mp);
- ump = TAILQ_NEXT(mmp, mnt_upper_link);
- TAILQ_REMOVE(&mp->mnt_uppers, mmp, mnt_upper_link);
+ ump = TAILQ_NEXT(&mmp->upper, mnt_upper_link);
+ TAILQ_REMOVE(&mp->mnt_notify, &mmp->upper, mnt_upper_link);
}
free(mmp, M_TEMP);
- mp->mnt_kern_flag &= ~MNTK_VGONE_UPPER;
- if ((mp->mnt_kern_flag & MNTK_VGONE_WAITER) != 0) {
- mp->mnt_kern_flag &= ~MNTK_VGONE_WAITER;
+ mp->mnt_upper_pending--;
+ if ((mp->mnt_kern_flag & MNTK_UPPER_WAITER) != 0 &&
+ mp->mnt_upper_pending == 0) {
+ mp->mnt_kern_flag &= ~MNTK_UPPER_WAITER;
wakeup(&mp->mnt_uppers);
}
MNT_IUNLOCK(mp);
@@ -4376,12 +4388,13 @@
MNT_KERN_FLAG(MNTK_EXTENDED_SHARED);
MNT_KERN_FLAG(MNTK_SHARED_WRITES);
MNT_KERN_FLAG(MNTK_NO_IOPF);
- MNT_KERN_FLAG(MNTK_VGONE_UPPER);
- MNT_KERN_FLAG(MNTK_VGONE_WAITER);
+ MNT_KERN_FLAG(MNTK_RECURSE);
+ MNT_KERN_FLAG(MNTK_UPPER_WAITER);
MNT_KERN_FLAG(MNTK_LOOKUP_EXCL_DOTDOT);
MNT_KERN_FLAG(MNTK_MARKER);
MNT_KERN_FLAG(MNTK_USES_BCACHE);
MNT_KERN_FLAG(MNTK_FPLOOKUP);
+ MNT_KERN_FLAG(MNTK_TASKQUEUE_WAITER);
MNT_KERN_FLAG(MNTK_NOASYNC);
MNT_KERN_FLAG(MNTK_UNMOUNT);
MNT_KERN_FLAG(MNTK_MWAIT);
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -190,6 +190,19 @@
_Static_assert(sizeof(struct mount_pcpu) == 16,
"the struct is allocated from pcpu 16 zone");
+/*
+ * Structure for tracking a stacked filesystem mounted above another
+ * filesystem. This is expected to be stored in the upper FS' per-mount data.
+ *
+ * Lock reference:
+ * i - lower mount interlock
+ * c - constant from node initialization
+ */
+struct mount_upper_node {
+ struct mount *mp; /* (c) mount object for upper FS */
+ TAILQ_ENTRY(mount_upper_node) mnt_upper_link; /* (i) position in uppers list */
+};
+
/*
* Structure per mounted filesystem. Each mounted filesystem has an
* array of operations and an instance record. The filesystems are
@@ -199,8 +212,8 @@
* l - mnt_listmtx
* m - mountlist_mtx
* i - interlock
- * i* - interlock of uppers' list head
* v - vnode freelist mutex
+ * d - deferred unmount list mutex
*
* Unmarked fields are considered stable as long as a ref is held.
*
@@ -242,10 +255,12 @@
struct mtx mnt_listmtx;
struct vnodelst mnt_lazyvnodelist; /* (l) list of lazy vnodes */
int mnt_lazyvnodelistsize; /* (l) # of lazy vnodes */
- int mnt_pinned_count; /* (i) unmount prevented */
+ int mnt_upper_pending; /* (i) # of pending ops on mnt_uppers */
struct lock mnt_explock; /* vfs_export walkers lock */
- TAILQ_ENTRY(mount) mnt_upper_link; /* (i*) we in the all uppers */
- TAILQ_HEAD(, mount) mnt_uppers; /* (i) upper mounts over us */
+ TAILQ_HEAD(, mount_upper_node) mnt_uppers; /* (i) upper mounts over us */
+ TAILQ_HEAD(, mount_upper_node) mnt_notify; /* (i) upper mounts for notification */
+ STAILQ_ENTRY(mount) mnt_taskqueue_link; /* (d) our place in deferred unmount list */
+ uint64_t mnt_taskqueue_flags; /* (d) unmount flags passed from taskqueue */
};
#endif /* _WANT_MOUNT || _KERNEL */
@@ -438,9 +453,13 @@
#define MNT_BYFSID 0x0000000008000000ULL /* specify filesystem by ID. */
#define MNT_NOCOVER 0x0000001000000000ULL /* Do not cover a mount point */
#define MNT_EMPTYDIR 0x0000002000000000ULL /* Only mount on empty dir */
-#define MNT_CMDFLAGS (MNT_UPDATE | MNT_DELEXPORT | MNT_RELOAD | \
+#define MNT_RECURSE 0x0000100000000000ULL /* recursively unmount uppers */
+#define MNT_DEFERRED 0x0000200000000000ULL /* unmount in async context */
+#define MNT_CMDFLAGS (MNT_UPDATE | MNT_DELEXPORT | MNT_RELOAD | \
MNT_FORCE | MNT_SNAPSHOT | MNT_NONBUSY | \
- MNT_BYFSID | MNT_NOCOVER | MNT_EMPTYDIR)
+ MNT_BYFSID | MNT_NOCOVER | MNT_EMPTYDIR | \
+ MNT_RECURSE | MNT_DEFERRED)
+
/*
* Internal filesystem control flags stored in mnt_kern_flag.
*
@@ -466,8 +485,8 @@
#define MNTK_NO_IOPF 0x00000100 /* Disallow page faults during reads
and writes. Filesystem shall properly
handle i/o state on EFAULT. */
-#define MNTK_VGONE_UPPER 0x00000200
-#define MNTK_VGONE_WAITER 0x00000400
+#define MNTK_RECURSE 0x00000200 /* pending recursive unmount */
+#define MNTK_UPPER_WAITER 0x00000400 /* waiting to drain MNTK_UPPER_PENDING */
#define MNTK_LOOKUP_EXCL_DOTDOT 0x00000800
#define MNTK_MARKER 0x00001000
#define MNTK_UNMAPPED_BUFS 0x00002000
@@ -477,8 +496,9 @@
#define MNTK_UNIONFS 0x00020000 /* A hack for F_ISUNIONSTACK */
#define MNTK_FPLOOKUP 0x00040000 /* fast path lookup is supported */
#define MNTK_SUSPEND_ALL 0x00080000 /* Suspended by all-fs suspension */
-#define MNTK_NOASYNC 0x00800000 /* disable async */
-#define MNTK_UNMOUNT 0x01000000 /* unmount in progress */
+#define MNTK_TASKQUEUE_WAITER 0x00100000 /* Waiting on unmount taskqueue */
+#define MNTK_NOASYNC 0x00800000 /* disable async */
+#define MNTK_UNMOUNT 0x01000000 /* unmount in progress */
#define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */
#define MNTK_SUSPEND 0x08000000 /* request write suspension */
#define MNTK_SUSPEND2 0x04000000 /* block secondary writes */
@@ -952,7 +972,7 @@
* exported vnode operations
*/
-int dounmount(struct mount *, int, struct thread *);
+int dounmount(struct mount *, uint64_t, struct thread *);
int kernel_mount(struct mntarg *ma, uint64_t flags);
int kernel_vmount(int flags, ...);
@@ -1012,8 +1032,13 @@
int vfs_suser(struct mount *, struct thread *);
void vfs_unbusy(struct mount *);
void vfs_unmountall(void);
-struct mount *vfs_pin_from_vp(struct vnode *);
-void vfs_unpin(struct mount *);
+struct mount *vfs_register_upper_from_vp(struct vnode *,
+ struct mount *ump, struct mount_upper_node *);
+void vfs_register_for_notification(struct mount *, struct mount *,
+ struct mount_upper_node *);
+void vfs_unregister_for_notification(struct mount *,
+ struct mount_upper_node *);
+void vfs_unregister_upper(struct mount *, struct mount_upper_node *);
extern TAILQ_HEAD(mntlist, mount) mountlist; /* mounted filesystem list */
extern struct mtx_padalign mountlist_mtx;
extern struct nfs_public nfs_pub;
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -297,7 +297,7 @@
*/
mp = vfs_getvfs(&etp->fsid);
if (mp != NULL)
- dounmount(mp, MNT_FORCE, curthread);
+ dounmount(mp, MNT_FORCE | MNT_RECURSE, curthread);
free(etp, M_UFSMNT);
}

File Metadata

Mime Type
text/plain
Expires
Tue, Jan 28, 2:17 AM (10 h, 46 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16238520
Default Alt Text
D31016.diff (24 KB)

Event Timeline