Page MenuHomeFreeBSD

D35054.diff
No OneTemporary

D35054.diff

diff --git a/sys/fs/unionfs/union_vfsops.c b/sys/fs/unionfs/union_vfsops.c
--- a/sys/fs/unionfs/union_vfsops.c
+++ b/sys/fs/unionfs/union_vfsops.c
@@ -310,6 +310,30 @@
return (ENOENT);
}
+ /*
+ * Specify that the covered vnode lock should remain held while
+ * lookup() performs the cross-mount walk. This prevents a lock-order
+ * reversal between the covered vnode lock (which is also locked by
+ * unionfs_lock()) and the mountpoint's busy count. Without this,
+ * unmount will lock the covered vnode lock (directly through the
+ * covered vnode) and wait for the busy count to drain, while a
+ * concurrent lookup will increment the busy count and then lock
+ * the covered vnode lock (indirectly through unionfs_lock()).
+ *
+ * Note that we can't yet use this facility for the 'below' case
+ * in which the upper vnode is the covered vnode, because that would
+ * introduce a different LOR in which the cross-mount lookup would
+ * effectively hold the upper vnode lock before acquiring the lower
+ * vnode lock, while an unrelated lock operation would still acquire
+ * the lower vnode lock before the upper vnode lock, which is the
+ * order unionfs currently requires.
+ */
+ if (!below) {
+ vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY);
+ mp->mnt_vnodecovered->v_vflag |= VV_CROSSLOCK;
+ VOP_UNLOCK(mp->mnt_vnodecovered);
+ }
+
MNT_ILOCK(mp);
if ((lowermp->mnt_flag & MNT_LOCAL) != 0 &&
(uppermp->mnt_flag & MNT_LOCAL) != 0)
@@ -362,6 +386,9 @@
if (error)
return (error);
+ vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY);
+ mp->mnt_vnodecovered->v_vflag &= ~VV_CROSSLOCK;
+ VOP_UNLOCK(mp->mnt_vnodecovered);
vfs_unregister_upper(ump->um_lowervp->v_mount, &ump->um_lower_link);
vfs_unregister_upper(ump->um_uppervp->v_mount, &ump->um_upper_link);
free(ump, M_UNIONFSMNT);
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -901,6 +901,8 @@
struct componentname *cnp = &ndp->ni_cnd;
int lkflags_save;
int ni_dvp_unlocked;
+ int crosslkflags;
+ bool crosslock;
/*
* Setup: break out flag bits into variables.
@@ -1232,18 +1234,32 @@
*/
while (dp->v_type == VDIR && (mp = dp->v_mountedhere) &&
(cnp->cn_flags & NOCROSSMOUNT) == 0) {
+ crosslock = (dp->v_vflag & VV_CROSSLOCK) != 0;
+ crosslkflags = compute_cn_lkflags(mp, cnp->cn_lkflags,
+ cnp->cn_flags);
+ if (__predict_false(crosslock) &&
+ (crosslkflags & LK_EXCLUSIVE) != 0 &&
+ VOP_ISLOCKED(dp) != LK_EXCLUSIVE) {
+ vn_lock(dp, LK_UPGRADE | LK_RETRY);
+ if (VN_IS_DOOMED(dp)) {
+ error = ENOENT;
+ goto bad2;
+ }
+ }
if (vfs_busy(mp, 0))
continue;
- vput(dp);
+ if (__predict_true(!crosslock))
+ vput(dp);
if (dp != ndp->ni_dvp)
vput(ndp->ni_dvp);
else
vrele(ndp->ni_dvp);
vrefact(vp_crossmp);
ndp->ni_dvp = vp_crossmp;
- error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags,
- cnp->cn_flags), &tdp);
+ error = VFS_ROOT(mp, crosslkflags, &tdp);
vfs_unbusy(mp);
+ if (__predict_false(crosslock))
+ vput(dp);
if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT))
panic("vp_crossmp exclusively locked or reclaimed");
if (error) {
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -770,17 +770,22 @@
* +->F->D->E
*
* The lookup() process for namei("/var") illustrates the process:
- * VOP_LOOKUP() obtains B while A is held
- * vfs_busy() obtains a shared lock on F while A and B are held
- * vput() releases lock on B
- * vput() releases lock on A
- * VFS_ROOT() obtains lock on D while shared lock on F is held
- * vfs_unbusy() releases shared lock on F
- * vn_lock() obtains lock on deadfs vnode vp_crossmp instead of A.
- * Attempt to lock A (instead of vp_crossmp) while D is held would
- * violate the global order, causing deadlocks.
+ * 1. VOP_LOOKUP() obtains B while A is held
+ * 2. vfs_busy() obtains a shared lock on F while A and B are held
+ * 3. vput() releases lock on B
+ * 4. vput() releases lock on A
+ * 5. VFS_ROOT() obtains lock on D while shared lock on F is held
+ * 6. vfs_unbusy() releases shared lock on F
+ * 7. vn_lock() obtains lock on deadfs vnode vp_crossmp instead of A.
+ * Attempt to lock A (instead of vp_crossmp) while D is held would
+ * violate the global order, causing deadlocks.
*
- * dounmount() locks B while F is drained.
+ * dounmount() locks B while F is drained. Note that for stacked
+ * filesystems, D and B in the example above may be the same lock,
+ * which introdues potential lock order reversal deadlock between
+ * dounmount() and step 5 above. These filesystems may avoid the LOR
+ * by setting VV_CROSSLOCK on the covered vnode so that lock B will
+ * remain held until after step 5.
*/
int
vfs_busy(struct mount *mp, int flags)
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -276,6 +276,7 @@
#define VV_FORCEINSMQ 0x1000 /* force the insmntque to succeed */
#define VV_READLINK 0x2000 /* fdescfs linux vnode */
#define VV_UNREF 0x4000 /* vunref, do not drop lock in inactive() */
+#define VV_CROSSLOCK 0x8000 /* vnode lock is shared w/ root mounted here */
#define VMP_LAZYLIST 0x0001 /* Vnode is on mnt's lazy list */

File Metadata

Mime Type
text/plain
Expires
Mon, Nov 18, 6:42 PM (22 h, 11 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14702915
Default Alt Text
D35054.diff (5 KB)

Event Timeline