D45398.diff
No OneTemporary
Actions

Size

72 KB

Referenced Files

None

Subscribers

None

D45398.diff
View Options

	diff --git a/sys/fs/unionfs/union.h b/sys/fs/unionfs/union.h
	--- a/sys/fs/unionfs/union.h
	+++ b/sys/fs/unionfs/union.h
	@@ -97,15 +97,17 @@

	char un_path; / path */
	int un_pathlen; /* strlen of path */
	- int un_flag; /* unionfs node flag */
	-};

	-/*
	- * unionfs node flags
	- * It needs the vnode with exclusive lock, when changing the un_flag variable.
	- */
	-#define UNIONFS_OPENEXTL 0x01 /* openextattr (lower) */
	-#define UNIONFS_OPENEXTU 0x02 /* openextattr (upper) */
	+ /*
	+ * unionfs node flags
	+ * Changing these flags requires the vnode to be locked exclusive.
	+ */
	+ #define UNIONFS_OPENEXTL 0x01 /* openextattr (lower) */
	+ #define UNIONFS_OPENEXTU 0x02 /* openextattr (upper) */
	+ #define UNIONFS_COPY_IN_PROGRESS 0x04 /* copy/dir shadow in progres */
	+ #define UNIONFS_LOOKUP_IN_PROGRESS 0x08
	+ unsigned int un_flag; /* unionfs node flag */
	+};

	extern struct vop_vector unionfs_vnodeops;

	@@ -136,29 +138,25 @@
	void unionfs_tryrem_node_status(struct unionfs_node *,
	struct unionfs_node_status *);
	int unionfs_check_rmdir(struct vnode , struct ucred , struct thread *td);
	-int unionfs_copyfile(struct unionfs_node , int, struct ucred ,
	+int unionfs_copyfile(struct vnode , int, struct ucred ,
	struct thread *);
	void unionfs_create_uppervattr_core(struct unionfs_mount , struct vattr ,
	struct vattr , struct thread );
	int unionfs_create_uppervattr(struct unionfs_mount , struct vnode ,
	struct vattr , struct ucred , struct thread *);
	-int unionfs_mkshadowdir(struct unionfs_mount , struct vnode ,
	- struct unionfs_node , struct componentname , struct thread *);
	+int unionfs_mkshadowdir(struct vnode , struct vnode ,
	+ struct componentname , struct thread );
	int unionfs_mkwhiteout(struct vnode , struct vnode ,
	struct componentname , struct thread , char *, int);
	int unionfs_relookup(struct vnode , struct vnode *,
	struct componentname , struct componentname , struct thread *,
	char *, int, u_long);
	-int unionfs_relookup_for_create(struct vnode , struct componentname ,
	- struct thread *);
	-int unionfs_relookup_for_delete(struct vnode , struct componentname ,
	- struct thread *);
	-int unionfs_relookup_for_rename(struct vnode , struct componentname ,
	- struct thread *);
	void unionfs_forward_vop_start_pair(struct vnode , int ,
	struct vnode , int );
	bool unionfs_forward_vop_finish_pair(struct vnode , struct vnode , int,
	struct vnode , struct vnode , int);
	+int unionfs_set_in_progress_flag(struct vnode *, unsigned int);
	+void unionfs_clear_in_progress_flag(struct vnode *, unsigned int);

	static inline void
	unionfs_forward_vop_start(struct vnode basevp, int lkflags)
	diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c
	--- a/sys/fs/unionfs/union_subr.c
	+++ b/sys/fs/unionfs/union_subr.c
	@@ -203,19 +203,19 @@
	struct unionfs_node_hashhead *hd;
	struct vnode *vp;

	- ASSERT_VOP_ELOCKED(uncp->un_uppervp, __func__);
	- ASSERT_VOP_ELOCKED(uncp->un_lowervp, __func__);
	- KASSERT(uncp->un_uppervp == NULLVP \|\| uncp->un_uppervp->v_type == VDIR,
	- ("%s: v_type != VDIR", __func__));
	- KASSERT(uncp->un_lowervp == NULLVP \|\| uncp->un_lowervp->v_type == VDIR,
	- ("%s: v_type != VDIR", __func__));
	-
	vp = NULLVP;
	VI_LOCK(dvp);
	- if (uncp->un_uppervp != NULL)
	+ if (uncp->un_uppervp != NULLVP) {
	+ ASSERT_VOP_ELOCKED(uncp->un_uppervp, __func__);
	+ KASSERT(uncp->un_uppervp->v_type == VDIR,
	+ ("%s: v_type != VDIR", __func__));
	vp = unionfs_get_cached_vnode_locked(uncp->un_uppervp, dvp);
	- else if (uncp->un_lowervp != NULL)
	+ } else if (uncp->un_lowervp != NULLVP) {
	+ ASSERT_VOP_ELOCKED(uncp->un_lowervp, __func__);
	+ KASSERT(uncp->un_lowervp->v_type == VDIR,
	+ ("%s: v_type != VDIR", __func__));
	vp = unionfs_get_cached_vnode_locked(uncp->un_lowervp, dvp);
	+ }
	if (vp == NULLVP) {
	hd = unionfs_get_hashhead(dvp, (uncp->un_uppervp != NULLVP ?
	uncp->un_uppervp : uncp->un_lowervp));
	@@ -276,9 +276,11 @@

	if (unp->un_dvp != NULLVP)
	vrele(unp->un_dvp);
	- if (unp->un_uppervp != NULLVP)
	+ if (unp->un_uppervp != NULLVP) {
	vput(unp->un_uppervp);
	- if (unp->un_lowervp != NULLVP)
	+ if (unp->un_lowervp != NULLVP)
	+ vrele(unp->un_lowervp);
	+ } else if (unp->un_lowervp != NULLVP)
	vput(unp->un_lowervp);
	if (unp->un_hashtbl != NULL)
	hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, UNIONFSHASHMASK);
	@@ -314,7 +316,7 @@
	*vpp = NULLVP;

	if (uppervp == NULLVP && lowervp == NULLVP)
	- panic("%s: upper and lower is null", __func__);
	+ panic("%s: upper and lower are both null", __func__);

	vt = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);

	@@ -327,7 +329,9 @@
	vp = unionfs_get_cached_vnode(uppervp, lowervp, dvp);
	if (vp != NULLVP) {
	*vpp = vp;
	- goto unionfs_nodeget_out;
	+ if (lkflags != 0)
	+ vn_lock(*vpp, lkflags \| LK_RETRY);
	+ return (0);
	}
	}

	@@ -385,27 +389,47 @@
	KASSERT(dvp != NULL \|\| (vp->v_vflag & VV_ROOT) != 0,
	("%s: NULL dvp for non-root vp %p", __func__, vp));

	- vn_lock_pair(lowervp, false, LK_EXCLUSIVE, uppervp, false,
	- LK_EXCLUSIVE);
	+
	+ /*
	+ * NOTE: There is still a possibility for cross-filesystem locking here.
	+ * If dvp has an upper FS component and is locked, while the new vnode
	+ * created here only has a lower-layer FS component, then we will end
	+ * up taking a lower-FS lock while holding an upper-FS lock.
	+ * That situation could be dealt with here using vn_lock_pair().
	+ * However, that would only address one instance out of many in which
	+ * a child vnode lock is taken while holding a lock on its parent
	+ * directory. This is done in many places in common VFS code, as well as
	+ * a few places within unionfs (which could lead to the same cross-FS
	+ * locking issue if, for example, the upper FS is another nested unionfs
	+ * instance). Additionally, it is unclear under what circumstances this
	+ * specific lock sequence (a directory on one FS followed by a child of
	+ * its 'peer' directory on another FS) would present the practical
	+ * possibility of deadlock due to some other agent on the system
	+ * attempting to lock those two specific vnodes in the opposite order.
	+ */
	+ if (uppervp != NULLVP)
	+ vn_lock(uppervp, LK_EXCLUSIVE \| LK_RETRY);
	+ else
	+ vn_lock(lowervp, LK_EXCLUSIVE \| LK_RETRY);
	error = insmntque1(vp, mp);
	if (error != 0) {
	unionfs_nodeget_cleanup(vp, unp);
	return (error);
	}
	- if (lowervp != NULL && VN_IS_DOOMED(lowervp)) {
	- vput(lowervp);
	- unp->un_lowervp = lowervp = NULL;
	- }
	- if (uppervp != NULL && VN_IS_DOOMED(uppervp)) {
	- vput(uppervp);
	- unp->un_uppervp = uppervp = NULL;
	- if (lowervp != NULLVP)
	- vp->v_vnlock = lowervp->v_vnlock;
	- }
	- if (lowervp == NULL && uppervp == NULL) {
	- unionfs_nodeget_cleanup(vp, unp);
	- return (ENOENT);
	- }
	+ /*
	+ * lowervp and uppervp should only be doomed by a forced unmount of
	+ * their respective filesystems, but that can only happen if the
	+ * unionfs instance is first unmounted. We also effectively hold the
	+ * lock on the new unionfs vnode at this point. Therefore, if a
	+ * unionfs umount has not yet reached the point at which the above
	+ * insmntque1() would fail, then its vflush() call will end up
	+ * blocked on our vnode lock, effectively also preventing unmount
	+ * of the underlying filesystems.
	+ */
	+ VNASSERT(lowervp == NULLVP \|\| !VN_IS_DOOMED(lowervp), vp,
	+ ("%s: doomed lowervp %p", __func__, lowervp));
	+ VNASSERT(uppervp == NULLVP \|\| !VN_IS_DOOMED(uppervp), vp,
	+ ("%s: doomed lowervp %p", __func__, uppervp));

	vn_set_state(vp, VSTATE_CONSTRUCTED);

	@@ -413,18 +437,16 @@
	*vpp = unionfs_ins_cached_vnode(unp, dvp);
	if (*vpp != NULLVP) {
	unionfs_nodeget_cleanup(vp, unp);
	- vp = *vpp;
	- } else {
	- if (uppervp != NULL)
	- VOP_UNLOCK(uppervp);
	- if (lowervp != NULL)
	- VOP_UNLOCK(lowervp);
	+ if (lkflags != 0)
	+ vn_lock(*vpp, lkflags \| LK_RETRY);
	+ return (0);
	+ } else
	*vpp = vp;
	- }

	-unionfs_nodeget_out:
	- if (lkflags & LK_TYPE_MASK)
	- vn_lock(vp, lkflags \| LK_RETRY);
	+ if ((lkflags & LK_SHARED) != 0)
	+ vn_lock(vp, LK_DOWNGRADE);
	+ else if ((lkflags & LK_EXCLUSIVE) == 0)
	+ VOP_UNLOCK(vp);

	return (0);
	}
	@@ -443,6 +465,7 @@
	struct vnode *dvp;
	int count;
	int writerefs;
	+ bool unlock_lvp;

	/*
	* The root vnode lock may be recursed during unmount, because
	@@ -455,18 +478,36 @@
	*/
	KASSERT(vp->v_vnlock->lk_recurse == 0 \|\| (vp->v_vflag & VV_ROOT) != 0,
	("%s: vnode %p locked recursively", __func__, vp));
	+
	+ unp = VTOUNIONFS(vp);
	+ VNASSERT(unp != NULL, vp, ("%s: already reclaimed", __func__));
	+ lvp = unp->un_lowervp;
	+ uvp = unp->un_uppervp;
	+ dvp = unp->un_dvp;
	+ unlock_lvp = (uvp == NULLVP);
	+
	+ /*
	+ * Lock the lower vnode in addition to the upper vnode lock in order
	+ * to synchronize against any unionfs_lock() operation which may still
	+ * hold the lower vnode lock. We do not need to do this for the root
	+ * vnode, as the root vnode should always have both upper and lower
	+ * base vnodes for its entire lifecycled, so unionfs_lock() should
	+ * never attempt to lock its lower vnode in the first place.
	+ * Moreover, during unmount of a non-"below" unionfs mount, the lower
	+ * root vnode will already be locked as it is the covered vnode.
	+ */
	+ if (uvp != NULLVP && lvp != NULLVP && (vp->v_vflag & VV_ROOT) == 0) {
	+ vn_lock_pair(uvp, true, LK_EXCLUSIVE, lvp, false, LK_EXCLUSIVE);
	+ unlock_lvp = true;
	+ }
	+
	if (lockmgr(&vp->v_lock, LK_EXCLUSIVE \| LK_NOWAIT, NULL) != 0)
	panic("%s: failed to acquire lock for vnode lock", __func__);
	-
	/*
	* Use the interlock to protect the clearing of v_data to
	* prevent faults in unionfs_lock().
	*/
	VI_LOCK(vp);
	- unp = VTOUNIONFS(vp);
	- lvp = unp->un_lowervp;
	- uvp = unp->un_uppervp;
	- dvp = unp->un_dvp;
	unp->un_lowervp = unp->un_uppervp = NULLVP;
	vp->v_vnlock = &(vp->v_lock);
	vp->v_data = NULL;
	@@ -502,18 +543,16 @@
	("%s: write reference without upper vnode", __func__));
	VOP_ADD_WRITECOUNT(uvp, -writerefs);
	}
	- if (lvp != NULLVP)
	- VOP_UNLOCK(lvp);
	if (uvp != NULLVP)
	- VOP_UNLOCK(uvp);
	+ vput(uvp);
	+ if (unlock_lvp)
	+ vput(lvp);
	+ else if (lvp != NULLVP)
	+ vrele(lvp);

	if (dvp != NULLVP)
	unionfs_rem_cached_vnode(unp, dvp);

	- if (lvp != NULLVP)
	- vrele(lvp);
	- if (uvp != NULLVP)
	- vrele(uvp);
	if (unp->un_path != NULL) {
	free(unp->un_path, M_UNIONFSPATH);
	unp->un_path = NULL;
	@@ -696,110 +735,6 @@
	return (error);
	}

	-/*
	- * relookup for CREATE namei operation.
	- *
	- * dvp is unionfs vnode. dvp should be locked.
	- *
	- * If it called 'unionfs_copyfile' function by unionfs_link etc,
	- * VOP_LOOKUP information is broken.
	- * So it need relookup in order to create link etc.
	- */
	-int
	-unionfs_relookup_for_create(struct vnode dvp, struct componentname cnp,
	- struct thread *td)
	-{
	- struct vnode *udvp;
	- struct vnode *vp;
	- struct componentname cn;
	- int error;
	-
	- udvp = UNIONFSVPTOUPPERVP(dvp);
	- vp = NULLVP;
	-
	- error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
	- cnp->cn_namelen, CREATE);
	- if (error)
	- return (error);
	-
	- if (vp != NULLVP) {
	- if (udvp == vp)
	- vrele(vp);
	- else
	- vput(vp);
	-
	- error = EEXIST;
	- }
	-
	- return (error);
	-}
	-
	-/*
	- * relookup for DELETE namei operation.
	- *
	- * dvp is unionfs vnode. dvp should be locked.
	- */
	-int
	-unionfs_relookup_for_delete(struct vnode dvp, struct componentname cnp,
	- struct thread *td)
	-{
	- struct vnode *udvp;
	- struct vnode *vp;
	- struct componentname cn;
	- int error;
	-
	- udvp = UNIONFSVPTOUPPERVP(dvp);
	- vp = NULLVP;
	-
	- error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
	- cnp->cn_namelen, DELETE);
	- if (error)
	- return (error);
	-
	- if (vp == NULLVP)
	- error = ENOENT;
	- else {
	- if (udvp == vp)
	- vrele(vp);
	- else
	- vput(vp);
	- }
	-
	- return (error);
	-}
	-
	-/*
	- * relookup for RENAME namei operation.
	- *
	- * dvp is unionfs vnode. dvp should be locked.
	- */
	-int
	-unionfs_relookup_for_rename(struct vnode dvp, struct componentname cnp,
	- struct thread *td)
	-{
	- struct vnode *udvp;
	- struct vnode *vp;
	- struct componentname cn;
	- int error;
	-
	- udvp = UNIONFSVPTOUPPERVP(dvp);
	- vp = NULLVP;
	-
	- error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
	- cnp->cn_namelen, RENAME);
	- if (error)
	- return (error);
	-
	- if (vp != NULLVP) {
	- if (udvp == vp)
	- vrele(vp);
	- else
	- vput(vp);
	- }
	-
	- return (error);
	-}
	-
	/*
	* Update the unionfs_node.
	*
	@@ -836,6 +771,8 @@
	vp->v_vnlock = uvp->v_vnlock;
	VI_UNLOCK(vp);

	+ for (count = 0; count < lockrec + 1; count++)
	+ VOP_UNLOCK(lvp);
	/*
	* Re-cache the unionfs vnode against the upper vnode
	*/
	@@ -850,19 +787,88 @@
	}
	}

	+/*
	+ * Mark a unionfs operation as being in progress, sleeping if the
	+ * same operation is already in progress.
	+ * This is useful, for example, during copy-up operations in which
	+ * we may drop the target vnode lock, but we want to avoid the
	+ * possibility of a concurrent copy-up on the same vnode triggering
	+ * a spurious failure.
	+ */
	+int
	+unionfs_set_in_progress_flag(struct vnode *vp, unsigned int flag)
	+{
	+ struct unionfs_node *unp;
	+ int error;
	+
	+ error = 0;
	+ ASSERT_VOP_ELOCKED(vp, __func__);
	+ VI_LOCK(vp);
	+ unp = VTOUNIONFS(vp);
	+ while (error == 0 && (unp->un_flag & flag) != 0) {
	+ VOP_UNLOCK(vp);
	+ error = msleep(vp, VI_MTX(vp), PCATCH \| PDROP, "unioncp", 0);
	+ vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	+ VI_LOCK(vp);
	+ if (error == 0) {
	+ /*
	+ * If we waited on a concurrent copy-up and that
	+ * copy-up was successful, return a non-fatal
	+ * indication that the desired operation is already
	+ * complete. If we waited on a concurrent lookup,
	+ * return ERELOOKUP to indicate the VFS cache should
	+ * be re-queried to avoid creating a duplicate unionfs
	+ * vnode.
	+ */
	+ unp = VTOUNIONFS(vp);
	+ if (unp == NULL)
	+ error = ENOENT;
	+ else if (flag == UNIONFS_COPY_IN_PROGRESS &&
	+ unp->un_uppervp != NULLVP)
	+ error = EJUSTRETURN;
	+ else if (flag == UNIONFS_LOOKUP_IN_PROGRESS)
	+ error = ERELOOKUP;
	+ }
	+ }
	+ if (error == 0)
	+ unp->un_flag \|= flag;
	+ VI_UNLOCK(vp);
	+
	+ return (error);
	+}
	+
	+void
	+unionfs_clear_in_progress_flag(struct vnode *vp, unsigned int flag)
	+{
	+ struct unionfs_node *unp;
	+
	+ ASSERT_VOP_ELOCKED(vp, __func__);
	+ unp = VTOUNIONFS(vp);
	+ VI_LOCK(vp);
	+ if (unp != NULL) {
	+ VNASSERT((unp->un_flag & flag) != 0, vp,
	+ ("%s: copy not in progress", __func__));
	+ unp->un_flag &= ~flag;
	+ }
	+ wakeup(vp);
	+ VI_UNLOCK(vp);
	+}
	+
	/*
	* Create a new shadow dir.
	*
	- * udvp should be locked on entry and will be locked on return.
	+ * dvp and vp are unionfs vnodes representing a parent directory and
	+ * child file, should be locked on entry, and will be locked on return.
	*
	* If no error returned, unp will be updated.
	*/
	int
	-unionfs_mkshadowdir(struct unionfs_mount ump, struct vnode udvp,
	- struct unionfs_node unp, struct componentname cnp, struct thread *td)
	+unionfs_mkshadowdir(struct vnode dvp, struct vnode vp,
	+ struct componentname cnp, struct thread td)
	{
	struct vnode *lvp;
	struct vnode *uvp;
	+ struct vnode *udvp;
	struct vattr va;
	struct vattr lva;
	struct nameidata nd;
	@@ -870,10 +876,25 @@
	struct ucred *cred;
	struct ucred *credbk;
	struct uidinfo *rootinfo;
	+ struct unionfs_mount *ump;
	+ struct unionfs_node *dunp;
	+ struct unionfs_node *unp;
	int error;

	+ ASSERT_VOP_ELOCKED(dvp, __func__);
	+ ASSERT_VOP_ELOCKED(vp, __func__);
	+ ump = MOUNTTOUNIONFSMOUNT(vp->v_mount);
	+ unp = VTOUNIONFS(vp);
	if (unp->un_uppervp != NULLVP)
	return (EEXIST);
	+ dunp = VTOUNIONFS(dvp);
	+ udvp = dunp->un_uppervp;
	+
	+ error = unionfs_set_in_progress_flag(vp, UNIONFS_COPY_IN_PROGRESS);
	+ if (error == EJUSTRETURN)
	+ return (0);
	+ else if (error != 0)
	+ return (error);

	lvp = unp->un_lowervp;
	uvp = NULLVP;
	@@ -897,11 +918,29 @@
	NDPREINIT(&nd);

	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred)))
	- goto unionfs_mkshadowdir_abort;
	+ goto unionfs_mkshadowdir_finish;

	+ vref(udvp);
	+ VOP_UNLOCK(vp);
	if ((error = unionfs_relookup(udvp, &uvp, cnp, &nd.ni_cnd, td,
	- cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
	- goto unionfs_mkshadowdir_abort;
	+ cnp->cn_nameptr, cnp->cn_namelen, CREATE))) {
	+ /*
	+ * When handling error cases here, we drop udvp's lock and
	+ * then jump to exit code that relocks dvp, which in most
	+ * cases will effectively relock udvp. However, this is
	+ * not guaranteed to be the case, as various calls made
	+ * here (such as unionfs_relookup() above and VOP_MKDIR()
	+ * below) may unlock and then relock udvp, allowing dvp to
	+ * be reclaimed in the meantime. In such a situation dvp
	+ * will no longer share its lock with udvp. Since
	+ * performance isn't a concern for these error cases, it
	+ * makes more sense to reuse the common code that locks
	+ * dvp on exit than to explicitly check for reclamation
	+ * of dvp.
	+ */
	+ vput(udvp);
	+ goto unionfs_mkshadowdir_relock;
	+ }
	if (uvp != NULLVP) {
	if (udvp == uvp)
	vrele(uvp);
	@@ -909,11 +948,14 @@
	vput(uvp);

	error = EEXIST;
	- goto unionfs_mkshadowdir_abort;
	+ vput(udvp);
	+ goto unionfs_mkshadowdir_relock;
	}

	- if ((error = vn_start_write(udvp, &mp, V_WAIT \| V_PCATCH)))
	- goto unionfs_mkshadowdir_abort;
	+ if ((error = vn_start_write(udvp, &mp, V_WAIT \| V_PCATCH))) {
	+ vput(udvp);
	+ goto unionfs_mkshadowdir_relock;
	+ }
	unionfs_create_uppervattr_core(ump, &lva, &va, td);

	/*
	@@ -924,7 +966,7 @@
	* component. This should be fine, as cn_namelen will still
	* correctly indicate the length of only the current component,
	* but ZFS in particular does not respect cn_namelen in its VOP_MKDIR
	- * implementation
	+ * implementation.
	* Note that this assumes nd.ni_cnd.cn_pnbuf was allocated by
	* something like a local namei() operation and the temporary
	* NUL-termination will not have an effect on other threads.
	@@ -934,27 +976,58 @@
	*pathend = '\0';
	error = VOP_MKDIR(udvp, &uvp, &nd.ni_cnd, &va);
	*pathend = pathterm;
	-
	- if (!error) {
	- /*
	- * XXX The bug which cannot set uid/gid was corrected.
	- * Ignore errors.
	- */
	- va.va_type = VNON;
	- VOP_SETATTR(uvp, &va, nd.ni_cnd.cn_cred);
	-
	+ if (error != 0) {
	/*
	- * VOP_SETATTR() may transiently drop uvp's lock, so it's
	- * important to call it before unionfs_node_update() transfers
	- * the unionfs vnode's lock from lvp to uvp; otherwise the
	- * unionfs vnode itself would be transiently unlocked and
	- * potentially doomed.
	+ * See the comment after unionfs_relookup() above for an
	+ * explanation of why we unlock udvp here only to relock
	+ * dvp on exit.
	*/
	- unionfs_node_update(unp, uvp, td);
	+ vput(udvp);
	+ vn_finished_write(mp);
	+ goto unionfs_mkshadowdir_relock;
	}
	+
	+ /*
	+ * XXX The bug which cannot set uid/gid was corrected.
	+ * Ignore errors.
	+ */
	+ va.va_type = VNON;
	+ /*
	+ * VOP_SETATTR() may transiently drop uvp's lock, so it's
	+ * important to call it before unionfs_node_update() transfers
	+ * the unionfs vnode's lock from lvp to uvp; otherwise the
	+ * unionfs vnode itself would be transiently unlocked and
	+ * potentially doomed.
	+ */
	+ VOP_SETATTR(uvp, &va, nd.ni_cnd.cn_cred);
	+
	+ /*
	+ * uvp may become doomed during VOP_VPUT_PAIR() if the implementation
	+ * must temporarily drop uvp's lock. However, since we hold a
	+ * reference to uvp from the VOP_MKDIR() call above, this would require
	+ * a forcible unmount of uvp's filesystem, which in turn can only
	+ * happen if our unionfs instance is first forcibly unmounted. We'll
	+ * therefore catch this case in the NULL check of unp below.
	+ */
	+ VOP_VPUT_PAIR(udvp, &uvp, false);
	vn_finished_write(mp);
	+ vn_lock_pair(vp, false, LK_EXCLUSIVE, uvp, true, LK_EXCLUSIVE);
	+ unp = VTOUNIONFS(vp);
	+ if (unp == NULL) {
	+ vput(uvp);
	+ error = ENOENT;
	+ } else
	+ unionfs_node_update(unp, uvp, td);
	+ VOP_UNLOCK(vp);
	+
	+unionfs_mkshadowdir_relock:
	+ vn_lock(dvp, LK_EXCLUSIVE \| LK_RETRY);
	+ vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	+ if (error == 0 && (VN_IS_DOOMED(dvp) \|\| VN_IS_DOOMED(vp)))
	+ error = ENOENT;

	-unionfs_mkshadowdir_abort:
	+unionfs_mkshadowdir_finish:
	+ unionfs_clear_in_progress_flag(vp, UNIONFS_COPY_IN_PROGRESS);
	cnp->cn_cred = credbk;
	chgproccnt(cred->cr_ruidinfo, -1, 0);
	crfree(cred);
	@@ -1116,23 +1189,31 @@
	/*
	* Create a new whiteout.
	*
	- * udvp and dvp should be locked on entry and will be locked on return.
	+ * dvp and vp are unionfs vnodes representing a parent directory and
	+ * child file, should be locked on entry, and will be locked on return.
	*/
	int
	-unionfs_mkwhiteout(struct vnode dvp, struct vnode udvp,
	+unionfs_mkwhiteout(struct vnode dvp, struct vnode vp,
	struct componentname cnp, struct thread td, char *path, int pathlen)
	{
	+ struct vnode *udvp;
	struct vnode *wvp;
	struct nameidata nd;
	struct mount *mp;
	int error;
	- int lkflags;
	+ bool dvp_locked;
	+
	+ ASSERT_VOP_ELOCKED(dvp, __func__);
	+ ASSERT_VOP_ELOCKED(vp, __func__);

	+ udvp = VTOUNIONFS(dvp)->un_uppervp;
	wvp = NULLVP;
	NDPREINIT(&nd);
	+ vref(udvp);
	+ VOP_UNLOCK(vp);
	if ((error = unionfs_relookup(udvp, &wvp, cnp, &nd.ni_cnd, td, path,
	pathlen, CREATE))) {
	- return (error);
	+ goto unionfs_mkwhiteout_cleanup;
	}
	if (wvp != NULLVP) {
	if (udvp == wvp)
	@@ -1140,18 +1221,27 @@
	else
	vput(wvp);

	- return (EEXIST);
	+ if (nd.ni_cnd.cn_flags & ISWHITEOUT)
	+ error = 0;
	+ else
	+ error = EEXIST;
	+ goto unionfs_mkwhiteout_cleanup;
	}

	if ((error = vn_start_write(udvp, &mp, V_WAIT \| V_PCATCH)))
	- goto unionfs_mkwhiteout_free_out;
	- unionfs_forward_vop_start(udvp, &lkflags);
	+ goto unionfs_mkwhiteout_cleanup;
	error = VOP_WHITEOUT(udvp, &nd.ni_cnd, CREATE);
	- unionfs_forward_vop_finish(dvp, udvp, lkflags);
	-
	vn_finished_write(mp);

	-unionfs_mkwhiteout_free_out:
	+unionfs_mkwhiteout_cleanup:
	+ if (VTOUNIONFS(dvp) == NULL) {
	+ vput(udvp);
	+ dvp_locked = false;
	+ } else {
	+ vrele(udvp);
	+ dvp_locked = true;
	+ }
	+ vn_lock_pair(dvp, dvp_locked, LK_EXCLUSIVE, vp, false, LK_EXCLUSIVE);
	return (error);
	}

	@@ -1165,10 +1255,11 @@
	*/
	static int
	unionfs_vn_create_on_upper(struct vnode *vpp, struct vnode udvp,
	- struct unionfs_node unp, struct vattr uvap, struct thread *td)
	+ struct vnode vp, struct vattr uvap, struct thread *td)
	{
	struct unionfs_mount *ump;
	- struct vnode *vp;
	+ struct unionfs_node *unp;
	+ struct vnode *uvp;
	struct vnode *lvp;
	struct ucred *cred;
	struct vattr lva;
	@@ -1176,8 +1267,10 @@
	int fmode;
	int error;

	+ ASSERT_VOP_ELOCKED(vp, __func__);
	+ unp = VTOUNIONFS(vp);
	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
	- vp = NULLVP;
	+ uvp = NULLVP;
	lvp = unp->un_lowervp;
	cred = td->td_ucred;
	fmode = FFLAGS(O_WRONLY \| O_CREAT \| O_TRUNC \| O_EXCL);
	@@ -1200,42 +1293,39 @@
	NDPREINIT(&nd);

	vref(udvp);
	- if ((error = vfs_relookup(udvp, &vp, &nd.ni_cnd, false)) != 0)
	- goto unionfs_vn_create_on_upper_free_out2;
	- vrele(udvp);
	+ VOP_UNLOCK(vp);
	+ if ((error = vfs_relookup(udvp, &uvp, &nd.ni_cnd, false)) != 0) {
	+ vrele(udvp);
	+ return (error);
	+ }

	- if (vp != NULLVP) {
	- if (vp == udvp)
	- vrele(vp);
	+ if (uvp != NULLVP) {
	+ if (uvp == udvp)
	+ vrele(uvp);
	else
	- vput(vp);
	+ vput(uvp);
	error = EEXIST;
	- goto unionfs_vn_create_on_upper_free_out1;
	+ goto unionfs_vn_create_on_upper_cleanup;
	}

	- if ((error = VOP_CREATE(udvp, &vp, &nd.ni_cnd, uvap)) != 0)
	- goto unionfs_vn_create_on_upper_free_out1;
	+ if ((error = VOP_CREATE(udvp, &uvp, &nd.ni_cnd, uvap)) != 0)
	+ goto unionfs_vn_create_on_upper_cleanup;

	- if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) {
	- vput(vp);
	- goto unionfs_vn_create_on_upper_free_out1;
	+ if ((error = VOP_OPEN(uvp, fmode, cred, td, NULL)) != 0) {
	+ vput(uvp);
	+ goto unionfs_vn_create_on_upper_cleanup;
	}
	- error = VOP_ADD_WRITECOUNT(vp, 1);
	- CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d",
	- __func__, vp, vp->v_writecount);
	+ error = VOP_ADD_WRITECOUNT(uvp, 1);
	+ CTR3(KTR_VFS, "%s: newvp %p v_writecount increased to %d",
	+ __func__, newvp, newvp->v_writecount);
	if (error == 0) {
	- *vpp = vp;
	+ *vpp = uvp;
	} else {
	- VOP_CLOSE(vp, fmode, cred, td);
	+ VOP_CLOSE(uvp, fmode, cred, td);
	}

	-unionfs_vn_create_on_upper_free_out1:
	- VOP_UNLOCK(udvp);
	-
	-unionfs_vn_create_on_upper_free_out2:
	- KASSERT(nd.ni_cnd.cn_pnbuf == unp->un_path,
	- ("%s: cn_pnbuf changed", __func__));
	-
	+unionfs_vn_create_on_upper_cleanup:
	+ vput(udvp);
	return (error);
	}

	@@ -1310,13 +1400,18 @@
	*
	* If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
	* docopy.
	+ *
	+ * vp is a unionfs vnode that should be locked on entry and will be
	+ * locked on return.
	*
	* If no error returned, unp will be updated.
	*/
	int
	-unionfs_copyfile(struct unionfs_node unp, int docopy, struct ucred cred,
	+unionfs_copyfile(struct vnode vp, int docopy, struct ucred cred,
	struct thread *td)
	{
	+ struct unionfs_node *unp;
	+ struct unionfs_node *dunp;
	struct mount *mp;
	struct vnode *udvp;
	struct vnode *lvp;
	@@ -1324,6 +1419,8 @@
	struct vattr uva;
	int error;

	+ ASSERT_VOP_ELOCKED(vp, __func__);
	+ unp = VTOUNIONFS(vp);
	lvp = unp->un_lowervp;
	uvp = NULLVP;

	@@ -1333,22 +1430,51 @@
	return (EINVAL);
	if (unp->un_uppervp != NULLVP)
	return (EEXIST);
	- udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
	+
	+ udvp = NULLVP;
	+ VI_LOCK(unp->un_dvp);
	+ dunp = VTOUNIONFS(unp->un_dvp);
	+ if (dunp != NULL)
	+ udvp = dunp->un_uppervp;
	+ VI_UNLOCK(unp->un_dvp);
	+
	if (udvp == NULLVP)
	return (EROFS);
	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
	return (EROFS);
	+ ASSERT_VOP_UNLOCKED(udvp, __func__);
	+
	+ error = unionfs_set_in_progress_flag(vp, UNIONFS_COPY_IN_PROGRESS);
	+ if (error == EJUSTRETURN)
	+ return (0);
	+ else if (error != 0)
	+ return (error);

	error = VOP_ACCESS(lvp, VREAD, cred, td);
	if (error != 0)
	- return (error);
	+ goto unionfs_copyfile_cleanup;

	if ((error = vn_start_write(udvp, &mp, V_WAIT \| V_PCATCH)) != 0)
	- return (error);
	- error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
	+ goto unionfs_copyfile_cleanup;
	+ error = unionfs_vn_create_on_upper(&uvp, udvp, vp, &uva, td);
	if (error != 0) {
	vn_finished_write(mp);
	- return (error);
	+ vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	+ goto unionfs_copyfile_cleanup;
	+ }
	+
	+ /*
	+ * Note that it's still possible for e.g. VOP_WRITE to relock
	+ * uvp below while holding vp[=lvp] locked. Replacing
	+ * unionfs_copyfile_core with vn_generic_copy_file_range() will
	+ * allow us to avoid the problem by moving this vn_lock_pair()
	+ * call much later.
	+ */
	+ vn_lock_pair(vp, false, LK_EXCLUSIVE, uvp, true, LK_EXCLUSIVE);
	+ unp = VTOUNIONFS(vp);
	+ if (unp == NULL) {
	+ error = ENOENT;
	+ goto unionfs_copyfile_cleanup;
	}

	if (docopy != 0) {
	@@ -1369,18 +1495,30 @@
	/* Reset the attributes. Ignore errors. */
	uva.va_type = VNON;
	VOP_SETATTR(uvp, &uva, cred);
	+ unionfs_node_update(unp, uvp, td);
	}

	- unionfs_node_update(unp, uvp, td);
	-
	+unionfs_copyfile_cleanup:
	+ unionfs_clear_in_progress_flag(vp, UNIONFS_COPY_IN_PROGRESS);
	return (error);
	}

	/*
	- * It checks whether vp can rmdir. (check empty)
	+ * Determine if the unionfs view of a directory is empty such that
	+ * an rmdir operation can be permitted.
	+ *
	+ * We assume the VOP_RMDIR() against the upper layer vnode will take
	+ * care of this check for us where the upper FS is concerned, so here
	+ * we concentrate on the lower FS. We need to check for the presence
	+ * of files other than "." and ".." in the lower FS directory and
	+ * then cross-check any files we find against the upper FS to see if
	+ * a whiteout is present (in which case we treat the lower file as
	+ * non-present).
	*
	- * vp is unionfs vnode.
	- * vp should be locked.
	+ * The logic here is based heavily on vn_dir_check_empty().
	+ *
	+ * vp should be a locked unionfs node, and vp's lowervp should also be
	+ * locked.
	*/
	int
	unionfs_check_rmdir(struct vnode vp, struct ucred cred, struct thread *td)
	@@ -1388,115 +1526,127 @@
	struct vnode *uvp;
	struct vnode *lvp;
	struct vnode *tvp;
	+ char *dirbuf;
	+ size_t dirbuflen, len;
	+ off_t off;
	struct dirent *dp;
	- struct dirent *edp;
	struct componentname cn;
	- struct iovec iov;
	- struct uio uio;
	struct vattr va;
	int error;
	int eofflag;
	- int lookuperr;
	-
	- /*
	- * The size of buf needs to be larger than DIRBLKSIZ.
	- */
	- char buf[256 * 6];
	-
	- ASSERT_VOP_ELOCKED(vp, __func__);

	eofflag = 0;
	- uvp = UNIONFSVPTOUPPERVP(vp);
	lvp = UNIONFSVPTOLOWERVP(vp);
	+ uvp = UNIONFSVPTOUPPERVP(vp);
	+
	+ /*
	+ * Note that the locking here still isn't ideal: We expect the caller
	+ * to hold both the upper and lower layer locks as well as the upper
	+ * parent directory lock, which it can do in a manner that avoids
	+ * deadlock. However, if the cross-check logic below needs to call
	+ * VOP_LOOKUP(), that may relock the upper vnode and lock any found
	+ * child vnode in a way that doesn't protect against deadlock given
	+ * the other held locks. Beyond that, the various other VOPs we issue
	+ * below, such as VOP_OPEN() and VOP_READDIR(), may also re-lock the
	+ * lower vnode.
	+ * We might instead just handoff between the upper vnode lock
	+ * (and its parent directory lock) and the lower vnode lock as needed,
	+ * so that the lower lock is never held at the same time as the upper
	+ * locks, but that opens up a wider window in which the upper
	+ * directory (and also the lower directory if it isn't truly
	+ * read-only) may change while the relevant lock is dropped. But
	+ * since re-locking may happen here and open up such a window anyway,
	+ * perhaps that is a worthwile tradeoff? Or perhaps we can ultimately
	+ * do sufficient tracking of empty state within the unionfs vnode
	+ * (in conjunction with upcalls from the lower FSes to notify us
	+ * of out-of-band state changes) that we can avoid these costly checks
	+ * altogether.
	+ */
	+ ASSERT_VOP_LOCKED(lvp, __func__);
	+ ASSERT_VOP_ELOCKED(uvp, __func__);

	- /* check opaque */
	if ((error = VOP_GETATTR(uvp, &va, cred)) != 0)
	return (error);
	if (va.va_flags & OPAQUE)
	return (0);

	- /* open vnode */
	#ifdef MAC
	- if ((error = mac_vnode_check_open(cred, vp, VEXEC\|VREAD)) != 0)
	+ if ((error = mac_vnode_check_open(cred, lvp, VEXEC \| VREAD)) != 0)
	return (error);
	#endif
	- if ((error = VOP_ACCESS(vp, VEXEC\|VREAD, cred, td)) != 0)
	+ if ((error = VOP_ACCESS(lvp, VEXEC \| VREAD, cred, td)) != 0)
	return (error);
	- if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0)
	+ if ((error = VOP_OPEN(lvp, FREAD, cred, td, NULL)) != 0)
	+ return (error);
	+ if ((error = VOP_GETATTR(lvp, &va, cred)) != 0)
	return (error);

	- uio.uio_rw = UIO_READ;
	- uio.uio_segflg = UIO_SYSSPACE;
	- uio.uio_td = td;
	- uio.uio_offset = 0;
	+ dirbuflen = max(DEV_BSIZE, GENERIC_MAXDIRSIZ);
	+ if (dirbuflen < va.va_blocksize)
	+ dirbuflen = va.va_blocksize;
	+ dirbuf = malloc(dirbuflen, M_TEMP, M_WAITOK);

	-#ifdef MAC
	- error = mac_vnode_check_readdir(td->td_ucred, lvp);
	-#endif
	- while (!error && !eofflag) {
	- iov.iov_base = buf;
	- iov.iov_len = sizeof(buf);
	- uio.uio_iov = &iov;
	- uio.uio_iovcnt = 1;
	- uio.uio_resid = iov.iov_len;
	+ len = 0;
	+ off = 0;
	+ eofflag = 0;

	- error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
	+ for (;;) {
	+ error = vn_dir_next_dirent(lvp, td, dirbuf, dirbuflen,
	+ &dp, &len, &off, &eofflag);
	if (error != 0)
	break;
	- KASSERT(eofflag != 0 \|\| uio.uio_resid < sizeof(buf),
	- ("%s: empty read from lower FS", __func__));
	-
	- edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
	- for (dp = (struct dirent*)buf; !error && dp < edp;
	- dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
	- if (dp->d_type == DT_WHT \|\| dp->d_fileno == 0 \|\|
	- (dp->d_namlen == 1 && dp->d_name[0] == '.') \|\|
	- (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
	- continue;
	-
	- cn.cn_namelen = dp->d_namlen;
	- cn.cn_pnbuf = NULL;
	- cn.cn_nameptr = dp->d_name;
	- cn.cn_nameiop = LOOKUP;
	- cn.cn_flags = LOCKPARENT \| LOCKLEAF \| RDONLY \| ISLASTCN;
	- cn.cn_lkflags = LK_EXCLUSIVE;
	- cn.cn_cred = cred;
	-
	- /*
	- * check entry in lower.
	- * Sometimes, readdir function returns
	- * wrong entry.
	- */
	- lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);

	- if (!lookuperr)
	- vput(tvp);
	- else
	- continue; /* skip entry */
	-
	- /*
	- * check entry
	- * If it has no exist/whiteout entry in upper,
	- * directory is not empty.
	- */
	- cn.cn_flags = LOCKPARENT \| LOCKLEAF \| RDONLY \| ISLASTCN;
	- lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
	+ if (len == 0) {
	+ /* EOF */
	+ error = 0;
	+ break;
	+ }

	- if (!lookuperr)
	- vput(tvp);
	+ if (dp->d_type == DT_WHT)
	+ continue;

	- /* ignore exist or whiteout entry */
	- if (!lookuperr \|\|
	- (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
	- continue;
	+ /*
	+ * Any file in the directory which is not '.' or '..' indicates
	+ * the directory is not empty.
	+ */
	+ switch (dp->d_namlen) {
	+ case 2:
	+ if (dp->d_name[1] != '.') {
	+ /* Can't be '..' (nor '.') */
	+ break;
	+ }
	+ /* FALLTHROUGH */
	+ case 1:
	+ if (dp->d_name[0] != '.') {
	+ /* Can't be '..' nor '.' */
	+ break;
	+ }
	+ continue;
	+ default:
	+ break;
	+ }

	+ cn.cn_namelen = dp->d_namlen;
	+ cn.cn_pnbuf = NULL;
	+ cn.cn_nameptr = dp->d_name;
	+ cn.cn_nameiop = LOOKUP;
	+ cn.cn_flags = LOCKPARENT \| LOCKLEAF \| RDONLY \| ISLASTCN;
	+ cn.cn_lkflags = LK_EXCLUSIVE;
	+ cn.cn_cred = cred;
	+
	+ error = VOP_LOOKUP(uvp, &tvp, &cn);
	+ if (tvp != NULLVP)
	+ vput(tvp);
	+ if (error != 0 && error != ENOENT && error != EJUSTRETURN)
	+ break;
	+ else if ((cn.cn_flags & ISWHITEOUT) == 0) {
	error = ENOTEMPTY;
	- }
	+ break;
	+ } else
	+ error = 0;
	}

	- /* close vnode */
	- VOP_CLOSE(vp, FREAD, cred, td);
	-
	+ VOP_CLOSE(lvp, FREAD, cred, td);
	+ free(dirbuf, M_TEMP);
	return (error);
	}
	-
	diff --git a/sys/fs/unionfs/union_vfsops.c b/sys/fs/unionfs/union_vfsops.c
	--- a/sys/fs/unionfs/union_vfsops.c
	+++ b/sys/fs/unionfs/union_vfsops.c
	@@ -327,18 +327,15 @@
	* unionfs_lock()) and the mountpoint's busy count. Without this,
	* unmount will lock the covered vnode lock (directly through the
	* covered vnode) and wait for the busy count to drain, while a
	- * concurrent lookup will increment the busy count and then lock
	+ * concurrent lookup will increment the busy count and then may lock
	* the covered vnode lock (indirectly through unionfs_lock()).
	*
	- * Note that we can't yet use this facility for the 'below' case
	- * in which the upper vnode is the covered vnode, because that would
	- * introduce a different LOR in which the cross-mount lookup would
	- * effectively hold the upper vnode lock before acquiring the lower
	- * vnode lock, while an unrelated lock operation would still acquire
	- * the lower vnode lock before the upper vnode lock, which is the
	- * order unionfs currently requires.
	+ * Note that this is only needed for the 'below' case in which the
	+ * upper vnode is also the covered vnode, because unionfs_lock()
	+ * only locks the upper vnode as long as both lower and upper vnodes
	+ * are present (which they will always be for the unionfs mount root).
	*/
	- if (!below) {
	+ if (below) {
	vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE \| LK_RETRY \| LK_CANRECURSE);
	mp->mnt_vnodecovered->v_vflag \|= VV_CROSSLOCK;
	VOP_UNLOCK(mp->mnt_vnodecovered);
	diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c
	--- a/sys/fs/unionfs/union_vnops.c
	+++ b/sys/fs/unionfs/union_vnops.c
	@@ -85,12 +85,11 @@
	struct componentname *cnp;
	struct thread *td;
	u_long nameiop;
	- u_long cnflags, cnflagsbk;
	- int iswhiteout;
	+ u_long cnflags;
	int lockflag;
	- int error , uerror, lerror;
	+ int lkflags;
	+ int error, uerror, lerror;

	- iswhiteout = 0;
	lockflag = 0;
	error = uerror = lerror = ENOENT;
	cnp = ap->a_cnp;
	@@ -119,88 +118,180 @@
	LOOKUP != nameiop)
	return (EROFS);

	+ /*
	+ * Note that a lookup is in-flight, and block if another lookup
	+ * is already in-flight against dvp. This is done because we may
	+ * end up dropping dvp's lock to look up a lower vnode or to create
	+ * a shadow directory, opening up the possibility of parallel lookups
	+ * against the same directory creating duplicate unionfs vnodes for
	+ * the same file(s). Note that if this function encounters an
	+ * in-progress lookup for the directory, it will block until the
	+ * lookup is complete and then return ERELOOKUP to allow any
	+ * existing unionfs vnode to be loaded from the VFS cache.
	+ * This is really a hack; filesystems that support MNTK_LOOKUP_SHARED
	+ * (which unionfs currently doesn't) seem to deal with this by using
	+ * the vfs_hash_* functions to manage a per-mount vnode cache keyed
	+ * by the inode number (or some roughly equivalent unique ID
	+ * usually assocated with the storage medium). It may make sense
	+ * for unionfs to adopt something similar as a replacement for its
	+ * current half-baked directory-only cache implementation, particularly
	+ * if we want to support MNTK_LOOKUP_SHARED here.
	+ */
	+ error = unionfs_set_in_progress_flag(dvp, UNIONFS_LOOKUP_IN_PROGRESS);
	+ if (error != 0)
	+ return (error);
	/*
	* lookup dotdot
	*/
	if (cnflags & ISDOTDOT) {
	- if (LOOKUP != nameiop && udvp == NULLVP)
	- return (EROFS);
	+ if (LOOKUP != nameiop && udvp == NULLVP) {
	+ error = EROFS;
	+ goto unionfs_lookup_return;
	+ }

	- if (udvp != NULLVP) {
	+ if (udvp != NULLVP)
	dtmpvp = udvp;
	- if (ldvp != NULLVP)
	- VOP_UNLOCK(ldvp);
	- }
	else
	dtmpvp = ldvp;

	+ unionfs_forward_vop_start(dtmpvp, &lkflags);
	error = VOP_LOOKUP(dtmpvp, &vp, cnp);
	+ unionfs_forward_vop_finish(dvp, dtmpvp, lkflags);

	- if (dtmpvp == udvp && ldvp != NULLVP) {
	- VOP_UNLOCK(udvp);
	- vn_lock(dvp, LK_EXCLUSIVE \| LK_RETRY);
	- dunp = VTOUNIONFS(dvp);
	- if (error == 0 && dunp == NULL)
	- error = ENOENT;
	- }
	+ /*
	+ * Drop the lock and reference on vp. If the lookup was
	+ * successful, we'll either need to exchange vp's lock and
	+ * reference for the unionfs parent vnode's lock and
	+ * reference, or (if dvp was reclaimed) we'll need to drop
	+ * vp's lock and reference to return early.
	+ */
	+ if (vp != NULLVP)
	+ vput(vp);
	+ dunp = VTOUNIONFS(dvp);
	+ if (error == 0 && dunp == NULL)
	+ error = ENOENT;

	if (error == 0) {
	- /*
	- * Exchange lock and reference from vp to
	- * dunp->un_dvp. vp is upper/lower vnode, but it
	- * will need to return the unionfs vnode.
	- */
	- if (nameiop == DELETE \|\| nameiop == RENAME \|\|
	- (cnp->cn_lkflags & LK_TYPE_MASK))
	- VOP_UNLOCK(vp);
	- vrele(vp);
	-
	dtmpvp = dunp->un_dvp;
	vref(dtmpvp);
	VOP_UNLOCK(dvp);
	*(ap->a_vpp) = dtmpvp;

	- if (nameiop == DELETE \|\| nameiop == RENAME)
	- vn_lock(dtmpvp, LK_EXCLUSIVE \| LK_RETRY);
	- else if (cnp->cn_lkflags & LK_TYPE_MASK)
	- vn_lock(dtmpvp, cnp->cn_lkflags \|
	- LK_RETRY);
	+ vn_lock(dtmpvp, cnp->cn_lkflags \| LK_RETRY);

	+ if (VN_IS_DOOMED(dtmpvp)) {
	+ vput(dtmpvp);
	+ *(ap->a_vpp) = NULLVP;
	+ error = ENOENT;
	+ }
	vn_lock(dvp, LK_EXCLUSIVE \| LK_RETRY);
	- } else if (error == ENOENT && (cnflags & MAKEENTRY) != 0)
	- cache_enter(dvp, NULLVP, cnp);
	+ }

	- goto unionfs_lookup_return;
	+ goto unionfs_lookup_cleanup;
	}

	+ /*
	+ * Lookup lower layer. We do this before looking up the the upper
	+ * layer, as we may drop the upper parent directory's lock, and we
	+ * want to ensure the upper parent remains locked from the point of
	+ * lookup through any ensuing VOP that may require it to be locked.
	+ * The cost of this is that we may end up performing an unnecessary
	+ * lower layer lookup if a whiteout is present in the upper layer.
	+ */
	+ if (ldvp != NULLVP && !(cnflags & DOWHITEOUT)) {
	+ struct componentname lcn;
	+ bool is_dot;
	+
	+ if (udvp != NULLVP) {
	+ vref(ldvp);
	+ VOP_UNLOCK(dvp);
	+ vn_lock(ldvp, LK_EXCLUSIVE \| LK_RETRY);
	+ }
	+
	+ lcn = *cnp;
	+ /* always op is LOOKUP */
	+ lcn.cn_nameiop = LOOKUP;
	+ lcn.cn_flags = cnflags;
	+ is_dot = false;
	+
	+ if (udvp == NULLVP)
	+ unionfs_forward_vop_start(ldvp, &lkflags);
	+ lerror = VOP_LOOKUP(ldvp, &lvp, &lcn);
	+ if (udvp == NULLVP &&
	+ unionfs_forward_vop_finish(dvp, ldvp, lkflags)) {
	+ if (lvp != NULLVP)
	+ VOP_UNLOCK(lvp);
	+ error = ENOENT;
	+ goto unionfs_lookup_cleanup;
	+ }
	+
	+ if (udvp == NULLVP)
	+ cnp->cn_flags = lcn.cn_flags;
	+
	+ if (lerror == 0) {
	+ if (ldvp == lvp) { /* is dot */
	+ vrele(lvp);
	+ *(ap->a_vpp) = dvp;
	+ vref(dvp);
	+ is_dot = true;
	+ error = lerror;
	+ } else if (lvp != NULLVP)
	+ VOP_UNLOCK(lvp);
	+ }
	+
	+ if (udvp != NULLVP) {
	+ vput(ldvp);
	+ vn_lock(dvp, LK_EXCLUSIVE \| LK_RETRY);
	+ if (VN_IS_DOOMED(dvp))
	+ error = ENOENT;
	+ }
	+ if (is_dot)
	+ goto unionfs_lookup_return;
	+ else if (error != 0)
	+ goto unionfs_lookup_cleanup;
	+ }
	/*
	* lookup upper layer
	*/
	if (udvp != NULLVP) {
	+ bool iswhiteout = false;
	+
	+ unionfs_forward_vop_start(udvp, &lkflags);
	uerror = VOP_LOOKUP(udvp, &uvp, cnp);
	+ if (unionfs_forward_vop_finish(dvp, udvp, lkflags)) {
	+ if (uvp != NULLVP)
	+ VOP_UNLOCK(uvp);
	+ error = ENOENT;
	+ goto unionfs_lookup_cleanup;
	+ }

	if (uerror == 0) {
	if (udvp == uvp) { /* is dot */
	+ if (lvp != NULLVP)
	+ vrele(lvp);
	vrele(uvp);
	*(ap->a_vpp) = dvp;
	vref(dvp);

	error = uerror;
	goto unionfs_lookup_return;
	- }
	- if (nameiop == DELETE \|\| nameiop == RENAME \|\|
	- (cnp->cn_lkflags & LK_TYPE_MASK))
	+ } else if (uvp != NULLVP)
	VOP_UNLOCK(uvp);
	}

	/* check whiteout */
	- if (uerror == ENOENT \|\| uerror == EJUSTRETURN)
	- if (cnp->cn_flags & ISWHITEOUT)
	- iswhiteout = 1; /* don't lookup lower */
	- if (iswhiteout == 0 && ldvp != NULLVP)
	- if (!VOP_GETATTR(udvp, &va, cnp->cn_cred) &&
	- (va.va_flags & OPAQUE))
	- iswhiteout = 1; /* don't lookup lower */
	+ if ((uerror == ENOENT \|\| uerror == EJUSTRETURN) &&
	+ (cnp->cn_flags & ISWHITEOUT))
	+ iswhiteout = true;
	+ else if (VOP_GETATTR(udvp, &va, cnp->cn_cred) == 0 &&
	+ (va.va_flags & OPAQUE))
	+ iswhiteout = true;
	+
	+ if (iswhiteout && lvp != NULLVP) {
	+ vrele(lvp);
	+ lvp = NULLVP;
	+ }
	+
	#if 0
	UNIONFS_INTERNAL_DEBUG(
	"unionfs_lookup: debug: whiteout=%d, path=%s\n",
	@@ -208,39 +299,6 @@
	#endif
	}

	- /*
	- * lookup lower layer
	- */
	- if (ldvp != NULLVP && !(cnflags & DOWHITEOUT) && iswhiteout == 0) {
	- /* always op is LOOKUP */
	- cnp->cn_nameiop = LOOKUP;
	- cnflagsbk = cnp->cn_flags;
	- cnp->cn_flags = cnflags;
	-
	- lerror = VOP_LOOKUP(ldvp, &lvp, cnp);
	-
	- cnp->cn_nameiop = nameiop;
	- if (udvp != NULLVP && (uerror == 0 \|\| uerror == EJUSTRETURN))
	- cnp->cn_flags = cnflagsbk;
	-
	- if (lerror == 0) {
	- if (ldvp == lvp) { /* is dot */
	- if (uvp != NULLVP)
	- vrele(uvp); /* no need? */
	- vrele(lvp);
	- *(ap->a_vpp) = dvp;
	- vref(dvp);
	-
	- UNIONFS_INTERNAL_DEBUG(
	- "unionfs_lookup: leave (%d)\n", lerror);
	-
	- return (lerror);
	- }
	- if (cnp->cn_lkflags & LK_TYPE_MASK)
	- VOP_UNLOCK(lvp);
	- }
	- }
	-
	/*
	* check lookup result
	*/
	@@ -280,8 +338,7 @@
	if (unp == NULL)
	error = ENOENT;
	else
	- error = unionfs_mkshadowdir(MOUNTTOUNIONFSMOUNT(dvp->v_mount),
	- udvp, unp, cnp, td);
	+ error = unionfs_mkshadowdir(dvp, vp, cnp, td);
	if (lockflag != 0)
	VOP_UNLOCK(vp);
	if (error != 0) {
	@@ -293,6 +350,10 @@
	vrele(vp);
	goto unionfs_lookup_cleanup;
	}
	+ /*
	+ * TODO: Since unionfs_mkshadowdir() relocks udvp after
	+ * creating the new directory, return ERELOOKUP here?
	+ */
	if ((cnp->cn_lkflags & LK_TYPE_MASK) == LK_SHARED)
	vn_lock(vp, LK_SHARED \| LK_RETRY);
	}
	@@ -313,9 +374,12 @@
	"unionfs_lookup: Unable to create unionfs vnode.");
	goto unionfs_lookup_cleanup;
	}
	- if ((nameiop == DELETE \|\| nameiop == RENAME) &&
	- (cnp->cn_lkflags & LK_TYPE_MASK) == 0)
	- vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	+ }
	+
	+ if (VN_IS_DOOMED(dvp) \|\| VN_IS_DOOMED(vp)) {
	+ error = ENOENT;
	+ vput(vp);
	+ goto unionfs_lookup_cleanup;
	}

	*(ap->a_vpp) = vp;
	@@ -329,10 +393,12 @@
	if (lvp != NULLVP)
	vrele(lvp);

	- if (error == ENOENT && (cnflags & MAKEENTRY) != 0)
	+ if (error == ENOENT && (cnflags & MAKEENTRY) != 0 &&
	+ !VN_IS_DOOMED(dvp))
	cache_enter(dvp, NULLVP, cnp);

	unionfs_lookup_return:
	+ unionfs_clear_in_progress_flag(dvp, UNIONFS_LOOKUP_IN_PROGRESS);

	UNIONFS_INTERNAL_DEBUG("unionfs_lookup: leave (%d)\n", error);

	@@ -492,6 +558,61 @@
	vn_lock(vp, LK_DOWNGRADE \| LK_RETRY);
	}

	+/*
	+ * Exchange the default (upper vnode) lock on a unionfs vnode for the lower
	+ * vnode lock, in support of operations that require access to the lower vnode
	+ * even when an upper vnode is present. We don't use vn_lock_pair() to hold
	+ * both vnodes at the same time, primarily because the caller may proceed
	+ * to issue VOPs to the lower layer which re-lock or perform other operations
	+ * which may not be safe in the presence of a locked vnode from another FS.
	+ * Moreover, vn_lock_pair()'s deadlock resolution approach can introduce
	+ * additional overhead that isn't necessary on these paths.
	+ *
	+ * vp must be a locked unionfs vnode; the lock state of this vnode is
	+ * returned through *lkflags for later use in unionfs_unlock_lvp().
	+ *
	+ * Returns the locked lower vnode, or NULL if the lower vnode (and therefore
	+ * also the unionfs vnode above it) has been doomed.
	+ */
	+static struct vnode *
	+unionfs_lock_lvp(struct vnode vp, int lkflags)
	+{
	+ struct unionfs_node *unp;
	+ struct vnode *lvp;
	+
	+ unp = VTOUNIONFS(vp);
	+ lvp = unp->un_lowervp;
	+ ASSERT_VOP_LOCKED(vp, __func__);
	+ ASSERT_VOP_UNLOCKED(lvp, __func__);
	+ *lkflags = VOP_ISLOCKED(vp);
	+ vref(lvp);
	+ VOP_UNLOCK(vp);
	+ vn_lock(lvp, *lkflags \| LK_RETRY);
	+ if (VN_IS_DOOMED(lvp)) {
	+ vput(lvp);
	+ lvp = NULLVP;
	+ vn_lock(vp, *lkflags \| LK_RETRY);
	+ }
	+ return (lvp);
	+}
	+
	+/*
	+ * Undo a previous call to unionfs_lock_lvp(), restoring the default lock
	+ * on the unionfs vnode. This function reloads and returns the vnode
	+ * private data for the unionfs vnode, which will be NULL if the unionfs
	+ * vnode became doomed while its lock was dropped. The caller must check
	+ * for this case.
	+ */
	+static struct unionfs_node *
	+unionfs_unlock_lvp(struct vnode vp, struct vnode lvp, int lkflags)
	+{
	+ ASSERT_VOP_LOCKED(lvp, __func__);
	+ ASSERT_VOP_UNLOCKED(vp, __func__);
	+ vput(lvp);
	+ vn_lock(vp, lkflags \| LK_RETRY);
	+ return (VTOUNIONFS(vp));
	+}
	+
	static int
	unionfs_open(struct vop_open_args *ap)
	{
	@@ -504,7 +625,9 @@
	struct ucred *cred;
	struct thread *td;
	int error;
	+ int lkflags;
	enum unionfs_lkupgrade lkstatus;
	+ bool lock_lvp, open_lvp;

	UNIONFS_INTERNAL_DEBUG("unionfs_open: enter\n");

	@@ -515,6 +638,7 @@
	targetvp = NULLVP;
	cred = ap->a_cred;
	td = ap->a_td;
	+ open_lvp = lock_lvp = false;

	/*
	* The executable loader path may call this function with vp locked
	@@ -546,10 +670,12 @@
	if (targetvp == NULLVP) {
	if (uvp == NULLVP) {
	if ((ap->a_mode & FWRITE) && lvp->v_type == VREG) {
	- error = unionfs_copyfile(unp,
	+ error = unionfs_copyfile(vp,
	!(ap->a_mode & O_TRUNC), cred, td);
	- if (error != 0)
	+ if (error != 0) {
	+ unp = VTOUNIONFS(vp);
	goto unionfs_open_abort;
	+ }
	targetvp = uvp = unp->un_uppervp;
	} else
	targetvp = lvp;
	@@ -557,30 +683,69 @@
	targetvp = uvp;
	}

	+ if (targetvp == uvp && uvp->v_type == VDIR && lvp != NULLVP &&
	+ unsp->uns_lower_opencnt <= 0)
	+ open_lvp = true;
	+ else if (targetvp == lvp && uvp != NULLVP)
	+ lock_lvp = true;
	+
	+ if (lock_lvp) {
	+ unp = NULL;
	+ lvp = unionfs_lock_lvp(vp, &lkflags);
	+ if (lvp == NULLVP) {
	+ error = ENOENT;
	+ goto unionfs_open_abort;
	+ }
	+ } else
	+ unionfs_forward_vop_start(targetvp, &lkflags);
	+
	error = VOP_OPEN(targetvp, ap->a_mode, cred, td, ap->a_fp);
	- if (error == 0) {
	- if (targetvp == uvp) {
	- if (uvp->v_type == VDIR && lvp != NULLVP &&
	- unsp->uns_lower_opencnt <= 0) {
	- /* open lower for readdir */
	- error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
	- if (error != 0) {
	- VOP_CLOSE(uvp, ap->a_mode, cred, td);
	- goto unionfs_open_abort;
	- }
	- unsp->uns_node_flag \|= UNS_OPENL_4_READDIR;
	- unsp->uns_lower_opencnt++;
	+
	+ if (lock_lvp) {
	+ unp = unionfs_unlock_lvp(vp, lvp, lkflags);
	+ if (unp == NULL && error == 0)
	+ error = ENOENT;
	+ } else if (unionfs_forward_vop_finish(vp, targetvp, lkflags))
	+ error = error ? error : ENOENT;
	+
	+ if (error != 0)
	+ goto unionfs_open_abort;
	+
	+ if (targetvp == uvp) {
	+ if (open_lvp) {
	+ unp = NULL;
	+ lvp = unionfs_lock_lvp(vp, &lkflags);
	+ if (lvp == NULLVP) {
	+ error = ENOENT;
	+ goto unionfs_open_abort;
	}
	- unsp->uns_upper_opencnt++;
	- } else {
	+ /* open lower for readdir */
	+ error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
	+ unp = unionfs_unlock_lvp(vp, lvp, lkflags);
	+ if (unp == NULL) {
	+ error = error ? error : ENOENT;
	+ goto unionfs_open_abort;
	+ }
	+ if (error != 0) {
	+ unionfs_forward_vop_start(uvp, &lkflags);
	+ VOP_CLOSE(uvp, ap->a_mode, cred, td);
	+ if (unionfs_forward_vop_finish(vp, uvp, lkflags))
	+ unp = NULL;
	+ goto unionfs_open_abort;
	+ }
	+ unsp->uns_node_flag \|= UNS_OPENL_4_READDIR;
	unsp->uns_lower_opencnt++;
	- unsp->uns_lower_openmode = ap->a_mode;
	}
	- vp->v_object = targetvp->v_object;
	+ unsp->uns_upper_opencnt++;
	+ } else {
	+ unsp->uns_lower_opencnt++;
	+ unsp->uns_lower_openmode = ap->a_mode;
	}
	+ vp->v_object = targetvp->v_object;

	unionfs_open_abort:
	- if (error != 0)
	+
	+ if (error != 0 && unp != NULL)
	unionfs_tryrem_node_status(unp, unsp);

	unionfs_open_cleanup:
	@@ -599,9 +764,13 @@
	struct ucred *cred;
	struct thread *td;
	struct vnode *vp;
	+ struct vnode *uvp;
	+ struct vnode *lvp;
	struct vnode *ovp;
	int error;
	+ int lkflags;
	enum unionfs_lkupgrade lkstatus;
	+ bool lock_lvp;

	UNIONFS_INTERNAL_DEBUG("unionfs_close: enter\n");

	@@ -611,6 +780,7 @@
	cred = ap->a_cred;
	td = ap->a_td;
	error = 0;
	+ lock_lvp = false;

	/*
	* If the vnode is reclaimed while upgrading, we can't safely use unp
	@@ -621,44 +791,72 @@
	goto unionfs_close_cleanup;

	unp = VTOUNIONFS(vp);
	+ lvp = unp->un_lowervp;
	+ uvp = unp->un_uppervp;
	unionfs_get_node_status(unp, td, &unsp);

	if (unsp->uns_lower_opencnt <= 0 && unsp->uns_upper_opencnt <= 0) {
	-#ifdef DIAGNOSTIC
	- printf("unionfs_close: warning: open count is 0\n");
	-#endif
	- if (unp->un_uppervp != NULLVP)
	- ovp = unp->un_uppervp;
	+ if (uvp != NULLVP)
	+ ovp = uvp;
	else
	- ovp = unp->un_lowervp;
	+ ovp = lvp;
	} else if (unsp->uns_upper_opencnt > 0)
	- ovp = unp->un_uppervp;
	+ ovp = uvp;
	else
	- ovp = unp->un_lowervp;
	+ ovp = lvp;
	+
	+ if (ovp == lvp && uvp != NULLVP) {
	+ lock_lvp = true;
	+ unp = NULL;
	+ lvp = unionfs_lock_lvp(vp, &lkflags);
	+ if (lvp == NULLVP) {
	+ error = ENOENT;
	+ goto unionfs_close_abort;
	+ }
	+ } else
	+ unionfs_forward_vop_start(ovp, &lkflags);

	error = VOP_CLOSE(ovp, ap->a_fflag, cred, td);

	+ if (lock_lvp) {
	+ unp = unionfs_unlock_lvp(vp, lvp, lkflags);
	+ if (unp == NULL && error == 0)
	+ error = ENOENT;
	+ } else if (unionfs_forward_vop_finish(vp, ovp, lkflags))
	+ error = error ? error : ENOENT;
	+
	if (error != 0)
	goto unionfs_close_abort;

	vp->v_object = ovp->v_object;

	- if (ovp == unp->un_uppervp) {
	- unsp->uns_upper_opencnt--;
	- if (unsp->uns_upper_opencnt == 0) {
	+ if (ovp == uvp) {
	+ if (unsp != NULL && ((--unsp->uns_upper_opencnt) == 0)) {
	if (unsp->uns_node_flag & UNS_OPENL_4_READDIR) {
	- VOP_CLOSE(unp->un_lowervp, FREAD, cred, td);
	+ unp = NULL;
	+ lvp = unionfs_lock_lvp(vp, &lkflags);
	+ if (lvp == NULLVP) {
	+ error = ENOENT;
	+ goto unionfs_close_abort;
	+ }
	+ VOP_CLOSE(lvp, FREAD, cred, td);
	+ unp = unionfs_unlock_lvp(vp, lvp, lkflags);
	+ if (unp == NULL) {
	+ error = ENOENT;
	+ goto unionfs_close_abort;
	+ }
	unsp->uns_node_flag &= ~UNS_OPENL_4_READDIR;
	unsp->uns_lower_opencnt--;
	}
	if (unsp->uns_lower_opencnt > 0)
	- vp->v_object = unp->un_lowervp->v_object;
	+ vp->v_object = lvp->v_object;
	}
	- } else
	+ } else if (unsp != NULL)
	unsp->uns_lower_opencnt--;

	unionfs_close_abort:
	- unionfs_tryrem_node_status(unp, unsp);
	+ if (unp != NULL && unsp != NULL)
	+ unionfs_tryrem_node_status(unp, unsp);

	unionfs_close_cleanup:
	unionfs_downgrade_lock(vp, lkstatus);
	@@ -883,7 +1081,7 @@
	return (EROFS);

	if (uvp == NULLVP && lvp->v_type == VREG) {
	- error = unionfs_copyfile(unp, (vap->va_size != 0),
	+ error = unionfs_copyfile(ap->a_vp, (vap->va_size != 0),
	ap->a_cred, td);
	if (error != 0)
	return (error);
	@@ -1078,8 +1276,10 @@
	error = VOP_REMOVE(udvp, uvp, cnp);
	unionfs_forward_vop_finish_pair(ap->a_dvp, udvp, udvp_lkflags,
	ap->a_vp, uvp, uvp_lkflags);
	- } else if (lvp != NULLVP)
	- error = unionfs_mkwhiteout(ap->a_dvp, udvp, cnp, td, path, pathlen);
	+ } else if (lvp != NULLVP) {
	+ error = unionfs_mkwhiteout(ap->a_dvp, ap->a_vp, cnp, td,
	+ path, pathlen);
	+ }

	UNIONFS_INTERNAL_DEBUG("unionfs_remove: leave (%d)\n", error);

	@@ -1096,7 +1296,6 @@
	struct componentname *cnp;
	struct thread *td;
	int error;
	- int needrelookup;

	UNIONFS_INTERNAL_DEBUG("unionfs_link: enter\n");

	@@ -1104,7 +1303,6 @@
	KASSERT_UNIONFS_VNODE(ap->a_vp);

	error = 0;
	- needrelookup = 0;
	dunp = VTOUNIONFS(ap->a_tdvp);
	unp = NULL;
	udvp = dunp->un_uppervp;
	@@ -1121,16 +1319,15 @@
	if (ap->a_vp->v_type != VREG)
	return (EOPNOTSUPP);

	- error = unionfs_copyfile(unp, 1, cnp->cn_cred, td);
	- if (error != 0)
	- return (error);
	- needrelookup = 1;
	+ VOP_UNLOCK(ap->a_tdvp);
	+ error = unionfs_copyfile(ap->a_vp, 1, cnp->cn_cred, td);
	+ vn_lock(ap->a_tdvp, LK_EXCLUSIVE \| LK_RETRY);
	+ if (error == 0)
	+ error = ERELOOKUP;
	+ return (error);
	}
	uvp = unp->un_uppervp;

	- if (needrelookup != 0)
	- error = unionfs_relookup_for_create(ap->a_tdvp, cnp, td);
	-
	if (error == 0) {
	int udvp_lkflags, uvp_lkflags;
	unionfs_forward_vop_start_pair(udvp, &udvp_lkflags,
	@@ -1154,8 +1351,6 @@
	struct vnode *tdvp;
	struct vnode *tvp;
	struct componentname *tcnp;
	- struct vnode *ltdvp;
	- struct vnode *ltvp;
	struct thread *td;

	/* rename target vnodes */
	@@ -1164,7 +1359,6 @@
	struct vnode *rtdvp;
	struct vnode *rtvp;

	- struct unionfs_mount *ump;
	struct unionfs_node *unp;
	int error;

	@@ -1177,8 +1371,6 @@
	tdvp = ap->a_tdvp;
	tvp = ap->a_tvp;
	tcnp = ap->a_tcnp;
	- ltdvp = NULLVP;
	- ltvp = NULLVP;
	td = curthread;
	rfdvp = fdvp;
	rfvp = fvp;
	@@ -1238,7 +1430,6 @@
	UNIONFS_INTERNAL_DEBUG("fvp=%p, ufvp=%p, lfvp=%p\n",
	fvp, unp->un_uppervp, unp->un_lowervp);
	#endif
	- ump = MOUNTTOUNIONFSMOUNT(fvp->v_mount);
	/*
	* If we only have a lower vnode, copy the source file to the upper
	* FS so that the rename operation can be issued against the upper FS.
	@@ -1282,10 +1473,10 @@
	else if (unp->un_uppervp == NULLVP) {
	switch (fvp->v_type) {
	case VREG:
	- error = unionfs_copyfile(unp, 1, fcnp->cn_cred, td);
	+ error = unionfs_copyfile(fvp, 1, fcnp->cn_cred, td);
	break;
	case VDIR:
	- error = unionfs_mkshadowdir(ump, rfdvp, unp, fcnp, td);
	+ error = unionfs_mkshadowdir(fdvp, fvp, fcnp, td);
	break;
	default:
	error = ENODEV;
	@@ -1327,7 +1518,6 @@
	goto unionfs_rename_abort;
	}
	rtdvp = unp->un_uppervp;
	- ltdvp = unp->un_lowervp;
	vref(rtdvp);

	if (tvp != NULLVP) {
	@@ -1348,7 +1538,6 @@
	goto unionfs_rename_abort;
	}
	rtvp = unp->un_uppervp;
	- ltvp = unp->un_lowervp;
	vref(rtvp);
	}
	}
	@@ -1365,12 +1554,8 @@
	cache_purge(fdvp);
	}

	- if (ltdvp != NULLVP)
	- VOP_UNLOCK(ltdvp);
	if (tdvp != rtdvp)
	vrele(tdvp);
	- if (ltvp != NULLVP)
	- VOP_UNLOCK(ltvp);
	if (tvp != rtvp && tvp != NULLVP) {
	if (rtvp == NULLVP)
	vput(tvp);
	@@ -1504,43 +1689,55 @@

	if (uvp != NULLVP) {
	if (lvp != NULLVP) {
	+ /*
	+ * We need to keep dvp and vp's upper vnodes locked
	+ * going into the VOP_RMDIR() call, but the empty
	+ * directory check also requires the lower vnode lock.
	+ * For this third, cross-filesystem lock we use a
	+ * similar approach taken by various FS' VOP_RENAME
	+ * implementations (which require 2-4 vnode locks).
	+ * First we attempt a NOWAIT acquisition, then if
	+ * that fails we drops the other two vnode locks,
	+ * acquire lvp's lock in the normal fashion to reduce
	+ * the likelihood of spinning on it in the future,
	+ * then drop, reacquire the other locks, and return
	+ * ERELOOKUP to re-drive the lookup in case the dvp->
	+ * vp relationship has changed.
	+ */
	+ if (vn_lock(lvp, LK_SHARED \| LK_NOWAIT) != 0) {
	+ VOP_UNLOCK(ap->a_vp);
	+ VOP_UNLOCK(ap->a_dvp);
	+ vn_lock(lvp, LK_SHARED \| LK_RETRY);
	+ VOP_UNLOCK(lvp);
	+ vn_lock(ap->a_dvp, LK_EXCLUSIVE \| LK_RETRY);
	+ vn_lock(ap->a_vp, LK_EXCLUSIVE \| LK_RETRY);
	+ return (ERELOOKUP);
	+ }
	error = unionfs_check_rmdir(ap->a_vp, cnp->cn_cred, td);
	+ /*
	+ * It's possible for a direct operation on the lower FS
	+ * to make the lower directory non-empty after we drop
	+ * the lock, but it's also possible for the upper-layer
	+ * VOP_RMDIR to relock udvp/uvp which would lead to
	+ * LOR if we kept lvp locked across that call.
	+ */
	+ VOP_UNLOCK(lvp);
	if (error != 0)
	return (error);
	}
	ump = MOUNTTOUNIONFSMOUNT(ap->a_vp->v_mount);
	if (ump->um_whitemode == UNIONFS_WHITE_ALWAYS \|\| lvp != NULLVP)
	cnp->cn_flags \|= DOWHITEOUT;
	- /*
	- * The relookup path will need to relock the parent dvp and
	- * possibly the vp as well. Locking is expected to be done
	- * in parent->child order; drop the lock on vp to avoid LOR
	- * and potential recursion on vp's lock.
	- * vp is expected to remain referenced during VOP_RMDIR(),
	- * so vref/vrele should not be necessary here.
	- */
	- VOP_UNLOCK(ap->a_vp);
	- VNPASS(vrefcnt(ap->a_vp) > 0, ap->a_vp);
	- error = unionfs_relookup_for_delete(ap->a_dvp, cnp, td);
	- vn_lock(ap->a_vp, LK_EXCLUSIVE \| LK_RETRY);
	- /*
	- * VOP_RMDIR is dispatched against udvp, so if uvp became
	- * doomed while the lock was dropped above the target
	- * filesystem may not be able to cope.
	- */
	- if (error == 0 && VN_IS_DOOMED(uvp))
	- error = ENOENT;
	- if (error == 0) {
	- int udvp_lkflags, uvp_lkflags;
	- unionfs_forward_vop_start_pair(udvp, &udvp_lkflags,
	- uvp, &uvp_lkflags);
	- error = VOP_RMDIR(udvp, uvp, cnp);
	- unionfs_forward_vop_finish_pair(ap->a_dvp, udvp, udvp_lkflags,
	- ap->a_vp, uvp, uvp_lkflags);
	- }
	- } else if (lvp != NULLVP)
	- error = unionfs_mkwhiteout(ap->a_dvp, udvp, cnp, td,
	+ int udvp_lkflags, uvp_lkflags;
	+ unionfs_forward_vop_start_pair(udvp, &udvp_lkflags,
	+ uvp, &uvp_lkflags);
	+ error = VOP_RMDIR(udvp, uvp, cnp);
	+ unionfs_forward_vop_finish_pair(ap->a_dvp, udvp, udvp_lkflags,
	+ ap->a_vp, uvp, uvp_lkflags);
	+ } else if (lvp != NULLVP) {
	+ error = unionfs_mkwhiteout(ap->a_dvp, ap->a_vp, cnp, td,
	unp->un_path, unp->un_pathlen);
	+ }

	if (error == 0) {
	cache_purge(ap->a_dvp);
	@@ -1613,6 +1810,7 @@
	uint64_t *cookies_bk;
	int error;
	int eofflag;
	+ int lkflags;
	int ncookies_bk;
	int uio_offset_bk;
	enum unionfs_lkupgrade lkstatus;
	@@ -1668,18 +1866,26 @@

	/* upper only */
	if (uvp != NULLVP && lvp == NULLVP) {
	+ unionfs_forward_vop_start(uvp, &lkflags);
	error = VOP_READDIR(uvp, uio, ap->a_cred, ap->a_eofflag,
	ap->a_ncookies, ap->a_cookies);
	- unsp->uns_readdir_status = 0;
	+ if (unionfs_forward_vop_finish(vp, uvp, lkflags))
	+ error = error ? error : ENOENT;
	+ else
	+ unsp->uns_readdir_status = 0;

	goto unionfs_readdir_exit;
	}

	/* lower only */
	if (uvp == NULLVP && lvp != NULLVP) {
	+ unionfs_forward_vop_start(lvp, &lkflags);
	error = VOP_READDIR(lvp, uio, ap->a_cred, ap->a_eofflag,
	ap->a_ncookies, ap->a_cookies);
	- unsp->uns_readdir_status = 2;
	+ if (unionfs_forward_vop_finish(vp, lvp, lkflags))
	+ error = error ? error : ENOENT;
	+ else
	+ unsp->uns_readdir_status = 2;

	goto unionfs_readdir_exit;
	}
	@@ -1689,14 +1895,17 @@
	*/
	KASSERT(uvp != NULLVP, ("unionfs_readdir: null upper vp"));
	KASSERT(lvp != NULLVP, ("unionfs_readdir: null lower vp"));
	+
	if (uio->uio_offset == 0)
	unsp->uns_readdir_status = 0;

	if (unsp->uns_readdir_status == 0) {
	/* read upper */
	+ unionfs_forward_vop_start(uvp, &lkflags);
	error = VOP_READDIR(uvp, uio, ap->a_cred, &eofflag,
	ap->a_ncookies, ap->a_cookies);
	-
	+ if (unionfs_forward_vop_finish(vp, uvp, lkflags) && error == 0)
	+ error = ENOENT;
	if (error != 0 \|\| eofflag == 0)
	goto unionfs_readdir_exit;
	unsp->uns_readdir_status = 1;
	@@ -1735,14 +1944,22 @@
	uio->uio_offset = 0;
	}

	- if (lvp == NULLVP) {
	- error = EBADF;
	+ lvp = unionfs_lock_lvp(vp, &lkflags);
	+ if (lvp == NULL) {
	+ error = ENOENT;
	goto unionfs_readdir_exit;
	}
	+
	/* read lower */
	error = VOP_READDIR(lvp, uio, ap->a_cred, ap->a_eofflag,
	ap->a_ncookies, ap->a_cookies);

	+
	+ unp = unionfs_unlock_lvp(vp, lvp, lkflags);
	+ if (unp == NULL && error == 0)
	+ error = ENOENT;
	+
	+
	/*
	* We can't return an uio_offset of 0: this would trigger an
	* infinite loop, because the next call to unionfs_readdir would
	@@ -1906,97 +2123,50 @@
	return (0);
	}

	-static int
	-unionfs_get_llt_revlock(struct vnode *vp, int flags)
	-{
	- int revlock;
	-
	- revlock = 0;
	-
	- switch (flags & LK_TYPE_MASK) {
	- case LK_SHARED:
	- if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE)
	- revlock = LK_UPGRADE;
	- else
	- revlock = LK_RELEASE;
	- break;
	- case LK_EXCLUSIVE:
	- case LK_UPGRADE:
	- revlock = LK_RELEASE;
	- break;
	- case LK_DOWNGRADE:
	- revlock = LK_UPGRADE;
	- break;
	- default:
	- break;
	- }
	-
	- return (revlock);
	-}
	-
	-/*
	- * The state of an acquired lock is adjusted similarly to
	- * the time of error generating.
	- * flags: LK_RELEASE or LK_UPGRADE
	- */
	-static void
	-unionfs_revlock(struct vnode *vp, int flags)
	-{
	- if (flags & LK_RELEASE)
	- VOP_UNLOCK_FLAGS(vp, flags);
	- else {
	- /* UPGRADE */
	- if (vn_lock(vp, flags) != 0)
	- vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	- }
	-}
	-
	static int
	unionfs_lock(struct vop_lock1_args *ap)
	{
	struct unionfs_node *unp;
	struct vnode *vp;
	- struct vnode *uvp;
	- struct vnode *lvp;
	+ struct vnode *tvp;
	int error;
	int flags;
	- int revlock;
	- int interlock;
	- int uhold;
	+ bool lvp_locked;

	- /*
	- * TODO: rework the unionfs locking scheme.
	- * It's not guaranteed to be safe to blindly lock two vnodes on
	- * different mounts as is done here. Further, the entanglement
	- * of locking both vnodes with the various options that can be
	- * passed to VOP_LOCK() makes this code hard to reason about.
	- * Instead, consider locking only the upper vnode, or the lower
	- * vnode is the upper is not present, and taking separate measures
	- * to lock both vnodes in the few cases when that is needed.
	- */
	error = 0;
	- interlock = 1;
	- uhold = 0;
	flags = ap->a_flags;
	vp = ap->a_vp;

	if (LK_RELEASE == (flags & LK_TYPE_MASK) \|\| !(flags & LK_TYPE_MASK))
	return (VOP_UNLOCK_FLAGS(vp, flags \| LK_RELEASE));

	+unionfs_lock_restart:
	+ /*
	+ * We currently need the interlock here to ensure we can safely
	+ * access the unionfs vnode's private data. We may be able to
	+ * eliminate this extra locking by instead using vfs_smr_enter()
	+ * and vn_load_v_data_smr() here in conjunction with an SMR UMA
	+ * zone for unionfs nodes.
	+ */
	if ((flags & LK_INTERLOCK) == 0)
	VI_LOCK(vp);
	+ else
	+ flags &= ~LK_INTERLOCK;

	unp = VTOUNIONFS(vp);
	- if (unp == NULL)
	- goto unionfs_lock_null_vnode;
	-
	- KASSERT_UNIONFS_VNODE(ap->a_vp);
	-
	- lvp = unp->un_lowervp;
	- uvp = unp->un_uppervp;
	+ if (unp == NULL) {
	+ VI_UNLOCK(vp);
	+ ap->a_flags = flags;
	+ return (vop_stdlock(ap));
	+ }

	- if ((revlock = unionfs_get_llt_revlock(vp, flags)) == 0)
	- panic("unknown lock type: 0x%x", flags & LK_TYPE_MASK);
	+ if (unp->un_uppervp != NULL) {
	+ tvp = unp->un_uppervp;
	+ lvp_locked = false;
	+ } else {
	+ tvp = unp->un_lowervp;
	+ lvp_locked = true;
	+ }

	/*
	* During unmount, the root vnode lock may be taken recursively,
	@@ -2009,150 +2179,77 @@
	(vp->v_vflag & VV_ROOT) != 0)
	flags \|= LK_CANRECURSE;

	- if (lvp != NULLVP) {
	- if (uvp != NULLVP && flags & LK_UPGRADE) {
	+ vholdnz(tvp);
	+ VI_UNLOCK(vp);
	+ error = VOP_LOCK(tvp, flags);
	+ vdrop(tvp);
	+ if (error == 0 && (lvp_locked \|\| VTOUNIONFS(vp) == NULL)) {
	+ /*
	+ * After dropping the interlock above, there exists a window
	+ * in which another thread may acquire the lower vnode lock
	+ * and then either doom the unionfs vnode or create an upper
	+ * vnode. In either case, we will effectively be holding the
	+ * wrong lock, so we must drop the lower vnode lock and
	+ * restart the lock operation.
	+ *
	+ * If unp is not already NULL, we assume that we can safely
	+ * access it because we currently hold lvp's lock.
	+ * unionfs_noderem() acquires lvp's lock before freeing
	+ * the vnode private data, ensuring it can't be concurrently
	+ * freed while we are using it here. Likewise,
	+ * unionfs_node_update() acquires lvp's lock before installing
	+ * an upper vnode. Without those guarantees, we would need to
	+ * reacquire the vnode interlock here.
	+ * Note that unionfs_noderem() doesn't acquire lvp's lock if
	+ * this is the root vnode, but the root vnode should always
	+ * have an upper vnode and therefore we should never use its
	+ * lower vnode lock here.
	+ */
	+ unp = VTOUNIONFS(vp);
	+ if (unp == NULL \|\| unp->un_uppervp != NULLVP) {
	+ VOP_UNLOCK(tvp);
	/*
	- * Share Lock is once released and a deadlock is
	- * avoided.
	+ * If we previously held the lock, the upgrade may
	+ * have temporarily dropped the lock, in which case
	+ * concurrent dooming or copy-up will necessitate
	+ * acquiring a different lock. Since we never held
	+ * the new lock, LK_UPGRADE must be cleared here to
	+ * avoid triggering a lockmgr panic.
	*/
	- vholdnz(uvp);
	- uhold = 1;
	- VOP_UNLOCK(uvp);
	- }
	- VI_LOCK_FLAGS(lvp, MTX_DUPOK);
	- flags \|= LK_INTERLOCK;
	- vholdl(lvp);
	-
	- VI_UNLOCK(vp);
	- ap->a_flags &= ~LK_INTERLOCK;
	-
	- error = VOP_LOCK(lvp, flags);
	-
	- VI_LOCK(vp);
	- unp = VTOUNIONFS(vp);
	- if (unp == NULL) {
	- /* vnode is released. */
	- VI_UNLOCK(vp);
	- if (error == 0)
	- VOP_UNLOCK(lvp);
	- vdrop(lvp);
	- if (uhold != 0)
	- vdrop(uvp);
	- goto unionfs_lock_fallback;
	+ if (flags & LK_UPGRADE)
	+ flags = (flags & ~LK_TYPE_MASK) \| LK_EXCLUSIVE;
	+ VNASSERT((flags & LK_DOWNGRADE) == 0, vp,
	+ ("%s: vnode doomed during downgrade", __func__));
	+ goto unionfs_lock_restart;
	}
	}

	- if (error == 0 && uvp != NULLVP) {
	- if (uhold && flags & LK_UPGRADE) {
	- flags &= ~LK_TYPE_MASK;
	- flags \|= LK_EXCLUSIVE;
	- }
	- VI_LOCK_FLAGS(uvp, MTX_DUPOK);
	- flags \|= LK_INTERLOCK;
	- if (uhold == 0) {
	- vholdl(uvp);
	- uhold = 1;
	- }
	-
	- VI_UNLOCK(vp);
	- ap->a_flags &= ~LK_INTERLOCK;
	-
	- error = VOP_LOCK(uvp, flags);
	-
	- VI_LOCK(vp);
	- unp = VTOUNIONFS(vp);
	- if (unp == NULL) {
	- /* vnode is released. */
	- VI_UNLOCK(vp);
	- if (error == 0)
	- VOP_UNLOCK(uvp);
	- vdrop(uvp);
	- if (lvp != NULLVP) {
	- VOP_UNLOCK(lvp);
	- vdrop(lvp);
	- }
	- goto unionfs_lock_fallback;
	- }
	- if (error != 0 && lvp != NULLVP) {
	- /* rollback */
	- VI_UNLOCK(vp);
	- unionfs_revlock(lvp, revlock);
	- interlock = 0;
	- }
	- }
	-
	- if (interlock)
	- VI_UNLOCK(vp);
	- if (lvp != NULLVP)
	- vdrop(lvp);
	- if (uhold != 0)
	- vdrop(uvp);
	-
	return (error);
	-
	-unionfs_lock_null_vnode:
	- ap->a_flags \|= LK_INTERLOCK;
	- return (vop_stdlock(ap));
	-
	-unionfs_lock_fallback:
	- /*
	- * If we reach this point, we've discovered the unionfs vnode
	- * has been reclaimed while the upper/lower vnode locks were
	- * temporarily dropped. Such temporary droppage may happen
	- * during the course of an LK_UPGRADE operation itself, and in
	- * that case LK_UPGRADE must be cleared as the unionfs vnode's
	- * lock has been reset to point to the standard v_lock field,
	- * which has not previously been held.
	- */
	- if (flags & LK_UPGRADE) {
	- ap->a_flags &= ~LK_TYPE_MASK;
	- ap->a_flags \|= LK_EXCLUSIVE;
	- }
	- return (vop_stdlock(ap));
	}

	static int
	unionfs_unlock(struct vop_unlock_args *ap)
	{
	struct vnode *vp;
	- struct vnode *lvp;
	- struct vnode *uvp;
	+ struct vnode *tvp;
	struct unionfs_node *unp;
	int error;
	- int uhold;

	KASSERT_UNIONFS_VNODE(ap->a_vp);

	- error = 0;
	- uhold = 0;
	vp = ap->a_vp;

	unp = VTOUNIONFS(vp);
	if (unp == NULL)
	- goto unionfs_unlock_null_vnode;
	- lvp = unp->un_lowervp;
	- uvp = unp->un_uppervp;
	-
	- if (lvp != NULLVP) {
	- vholdnz(lvp);
	- error = VOP_UNLOCK(lvp);
	- }
	-
	- if (error == 0 && uvp != NULLVP) {
	- vholdnz(uvp);
	- uhold = 1;
	- error = VOP_UNLOCK(uvp);
	- }
	+ return (vop_stdunlock(ap));

	- if (lvp != NULLVP)
	- vdrop(lvp);
	- if (uhold != 0)
	- vdrop(uvp);
	+ tvp = (unp->un_uppervp != NULL ? unp->un_uppervp : unp->un_lowervp);

	- return error;
	+ vholdnz(tvp);
	+ error = VOP_UNLOCK(tvp);
	+ vdrop(tvp);

	-unionfs_unlock_null_vnode:
	- return (vop_stdunlock(ap));
	+ return (error);
	}

	static int
	@@ -2192,7 +2289,7 @@
	uvp = unp->un_uppervp;

	if (uvp == NULLVP) {
	- error = unionfs_copyfile(unp, 1, td->td_ucred, td);
	+ error = unionfs_copyfile(ap->a_vp, 1, td->td_ucred, td);
	if (error != 0)
	goto unionfs_advlock_abort;
	uvp = unp->un_uppervp;
	@@ -2294,7 +2391,7 @@
	return (EROFS);

	if (uvp == NULLVP && lvp->v_type == VREG) {
	- if ((error = unionfs_copyfile(unp, 1, ap->a_cred, td)) != 0)
	+ if ((error = unionfs_copyfile(ap->a_vp, 1, ap->a_cred, td)) != 0)
	return (error);
	uvp = unp->un_uppervp;
	}
	@@ -2467,9 +2564,10 @@
	if (ovp == lvp && lvp->v_type == VREG) {
	VOP_CLOSEEXTATTR(lvp, 0, cred, td);
	if (uvp == NULLVP &&
	- (error = unionfs_copyfile(unp, 1, cred, td)) != 0) {
	+ (error = unionfs_copyfile(ap->a_vp, 1, cred, td)) != 0) {
	unionfs_setextattr_reopen:
	- if ((unp->un_flag & UNIONFS_OPENEXTL) &&
	+ unp = VTOUNIONFS(ap->a_vp);
	+ if (unp != NULL && (unp->un_flag & UNIONFS_OPENEXTL) &&
	VOP_OPENEXTATTR(lvp, cred, td)) {
	#ifdef DIAGNOSTIC
	panic("unionfs: VOP_OPENEXTATTR failed");
	@@ -2561,9 +2659,10 @@
	if (ovp == lvp && lvp->v_type == VREG) {
	VOP_CLOSEEXTATTR(lvp, 0, cred, td);
	if (uvp == NULLVP &&
	- (error = unionfs_copyfile(unp, 1, cred, td)) != 0) {
	+ (error = unionfs_copyfile(ap->a_vp, 1, cred, td)) != 0) {
	unionfs_deleteextattr_reopen:
	- if ((unp->un_flag & UNIONFS_OPENEXTL) &&
	+ unp = VTOUNIONFS(ap->a_vp);
	+ if (unp != NULL && (unp->un_flag & UNIONFS_OPENEXTL) &&
	VOP_OPENEXTATTR(lvp, cred, td)) {
	#ifdef DIAGNOSTIC
	panic("unionfs: VOP_OPENEXTATTR failed");
	@@ -2613,7 +2712,7 @@
	return (EROFS);

	if (uvp == NULLVP && lvp->v_type == VREG) {
	- if ((error = unionfs_copyfile(unp, 1, ap->a_cred, td)) != 0)
	+ if ((error = unionfs_copyfile(ap->a_vp, 1, ap->a_cred, td)) != 0)
	return (error);
	uvp = unp->un_uppervp;
	}
	@@ -2665,7 +2764,7 @@
	unionfs_vput_pair(struct vop_vput_pair_args *ap)
	{
	struct mount *mp;
	- struct vnode dvp, vp, *vpp, lvp, ldvp, uvp, udvp, tempvp;
	+ struct vnode dvp, vp, *vpp, lvp, uvp, tvp, tdvp, tempvp;
	struct unionfs_node dunp, unp;
	int error, res;

	@@ -2674,11 +2773,14 @@
	vp = NULLVP;
	lvp = NULLVP;
	uvp = NULLVP;
	+ tvp = NULLVP;
	unp = NULL;

	dunp = VTOUNIONFS(dvp);
	- udvp = dunp->un_uppervp;
	- ldvp = dunp->un_lowervp;
	+ if (dunp->un_uppervp != NULL)
	+ tdvp = dunp->un_uppervp;
	+ else
	+ tdvp = dunp->un_lowervp;

	/*
	* Underlying vnodes should be locked because the encompassing unionfs
	@@ -2686,10 +2788,7 @@
	* only be on the unionfs node. Reference them now so that the vput()s
	* performed by VOP_VPUT_PAIR() will have a reference to drop.
	*/
	- if (udvp != NULLVP)
	- vref(udvp);
	- if (ldvp != NULLVP)
	- vref(ldvp);
	+ vref(tdvp);

	if (vpp != NULL)
	vp = *vpp;
	@@ -2699,9 +2798,10 @@
	uvp = unp->un_uppervp;
	lvp = unp->un_lowervp;
	if (uvp != NULLVP)
	- vref(uvp);
	- if (lvp != NULLVP)
	- vref(lvp);
	+ tvp = uvp;
	+ else
	+ tvp = lvp;
	+ vref(tvp);

	/*
	* If we're being asked to return a locked child vnode, then
	@@ -2721,31 +2821,19 @@
	}
	}

	- /*
	- * TODO: Because unionfs_lock() locks both the lower and upper vnodes
	- * (if available), we must also call VOP_VPUT_PAIR() on both the lower
	- * and upper parent/child pairs. If unionfs_lock() is reworked to lock
	- * only a single vnode, this code will need to change to also only
	- * operate on one vnode pair.
	- */
	- ASSERT_VOP_LOCKED(ldvp, __func__);
	- ASSERT_VOP_LOCKED(udvp, __func__);
	- ASSERT_VOP_LOCKED(lvp, __func__);
	- ASSERT_VOP_LOCKED(uvp, __func__);
	-
	- KASSERT(lvp == NULLVP \|\| ldvp != NULLVP,
	- ("%s: NULL ldvp with non-NULL lvp", __func__));
	- if (ldvp != NULLVP)
	- res = VOP_VPUT_PAIR(ldvp, lvp != NULLVP ? &lvp : NULL, true);
	- KASSERT(uvp == NULLVP \|\| udvp != NULLVP,
	- ("%s: NULL udvp with non-NULL uvp", __func__));
	- if (udvp != NULLVP)
	- res = VOP_VPUT_PAIR(udvp, uvp != NULLVP ? &uvp : NULL, true);
	-
	- ASSERT_VOP_UNLOCKED(ldvp, __func__);
	- ASSERT_VOP_UNLOCKED(udvp, __func__);
	- ASSERT_VOP_UNLOCKED(lvp, __func__);
	- ASSERT_VOP_UNLOCKED(uvp, __func__);
	+ ASSERT_VOP_LOCKED(tdvp, __func__);
	+ ASSERT_VOP_LOCKED(tvp, __func__);
	+
	+ if (tdvp == dunp->un_uppervp && tvp != NULLVP && tvp == lvp) {
	+ vput(tvp);
	+ vput(tdvp);
	+ res = 0;
	+ } else {
	+ res = VOP_VPUT_PAIR(tdvp, tvp != NULLVP ? &tvp : NULL, true);
	+ }
	+
	+ ASSERT_VOP_UNLOCKED(tdvp, __func__);
	+ ASSERT_VOP_UNLOCKED(tvp, __func__);

	/*
	* VOP_VPUT_PAIR() dropped the references we added to the underlying

File Metadata

Mime Type: text/plain
Expires: Sat, Jan 25, 5:56 AM (20 h, 10 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 16127209
Default Alt Text: D45398.diff (72 KB)

D45398.diffNo OneTemporaryActions

D45398.diffView Options

File Metadata

Event Timeline

D45398.diff
No OneTemporary
Actions

D45398.diff
View Options