Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F102675607
D40850.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
8 KB
Referenced Files
None
Subscribers
None
D40850.diff
View Options
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -102,6 +102,10 @@
/* Allocation zone for namei. */
uma_zone_t namei_zone;
+/* Forward declaration (for use in vn_cross_mounts()). */
+static int enforce_lkflags(struct mount *mp, int lkflags);
+
+
/* Placeholder vnode for mp traversal. */
static struct vnode *vp_crossmp;
@@ -157,6 +161,233 @@
* gets allocated early. See nameiinit for the direct call below.
*/
+/*
+ * Returns busied the mount point mounted on the passed vnode, if any.
+ *
+ * The vnode's lock must be held and may be released on output, as indicated by
+ * '*unlocked'. The caller must also have an active reference on the vnode
+ * (vref() or vget()) which is preserved across the call. On success, the
+ * busied mount point is passed through 'mp'.
+ *
+ * If the vnode is not mounted-on, EJUSTRETURN is returned and '*mp' is set to
+ * NULL. Concurrent unmounts/remounts of the covering mount are handled
+ * transparently by restarting the process (doing so is currently not really
+ * necessary for correctness but is closer to the historical behavior where the
+ * unmounts/remounts were prevented to happen in this case, and will be required
+ * (but not enough) if we ever want to implement such things as atomic mount
+ * substitutions). ENOENT is returned if the vnode was doomed while trying to
+ * determine its covering mount, and '*mp' is set to NULL. Else, '*mp' is set
+ * to the busied mount point and 0 is returned.
+ */
+int
+vn_busy_mountedhere(struct vnode *vp, bool *unlocked, struct mount **mp)
+{
+ int error;
+
+ ASSERT_VOP_LOCKED(vp, __func__);
+ ASSERT_VI_UNLOCKED(vp, __func__);
+
+ *unlocked = false;
+ *mp = NULL;
+
+ if (VN_IS_DOOMED(vp))
+ return (ENOENT);
+
+ if (__predict_true((vn_irflag_read(vp) & VIRF_MOUNTPOINT) == 0))
+ return (EJUSTRETURN);
+
+ *mp = vp->v_mountedhere;
+ MPASS(*mp != NULL);
+
+ /*
+ * Opportunistically try to busy the mount point. On success, this can
+ * avoid a superfluous unlock/relock cycle on 'vp' in some cases (in
+ * particular, the vfs_lookup() case), and always avoids a pair of
+ * vfs_ref()/vfs_rel() calls.
+ */
+ error = vfs_busy(*mp, MBF_NOWAIT);
+ if (__predict_true(error == 0))
+ return (error);
+
+ /* Make sure '*mp' survives the unlock of 'vp'. */
+ vfs_ref(*mp);
+ VOP_UNLOCK(vp);
+ *unlocked = true;
+
+ for (;;) {
+ error = vfs_busy(*mp, 0);
+ vfs_rel(*mp);
+
+ if (__predict_true(error == 0))
+ return (0);
+
+ *mp = NULL;
+ VI_LOCK(vp);
+
+ if (VN_IS_DOOMED(vp)) {
+ error = ENOENT;
+ goto unlock_exit;
+ }
+
+ if (__predict_true
+ ((vn_irflag_read(vp) & VIRF_MOUNTPOINT) == 0)) {
+ error = EJUSTRETURN;
+ goto unlock_exit;
+ }
+ /*
+ * We can't retrieve the same (conceptual) mount point as before
+ * since the vfs_busy() above returned with an error only after
+ * 'v_mountedhere' was cleared on the covered vnode (but we well
+ * could retrieve the same pointer in case the structure is
+ * recycled).
+ */
+ *mp = vp->v_mountedhere;
+ MPASS(*mp != NULL);
+
+ /*
+ * This establishes the order "covered vnode's interlock" ->
+ * "mounted-here mount point interlock". Note that this order
+ * between a vnode and a mount point is the reverse of that of
+ * "vnode's owning mount point" -> "vnode's interlock", without
+ * causing trouble since the mount point is different in both
+ * cases. This causes a spurious LOR with the initial 'devfs'
+ * being mounted at '/' and then remounted at '/dev' (see
+ * vfs_mountroot()).
+ */
+ vfs_ref(*mp);
+ VI_UNLOCK(vp);
+ }
+
+ __assert_unreachable();
+
+unlock_exit:
+ VI_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * Cross a single mounted-on vnode, returning the mount's root vnode.
+ *
+ * The vnode's lock must be held, and may be unlocked on output as indicated by
+ * '*unlocked' or on success. The caller must also have an active reference on
+ * the vnode (vref() or vget()) which is preserved across the call. On success,
+ * the mount's root vnode is returned locked according to 'root_lkflags' and
+ * with an active reference.
+ *
+ * Behaves essentially for errors and outputs as vn_busy_mountedhere(), which it
+ * calls first, with '*vpp' taking the role of '*mp'. In case of success for
+ * this step, VFS_ROOT() is called and its result returned. In case of any
+ * error, '*vpp' is set to NULL. On overall success, '*unlocked' is guaranteed
+ * to be set to true.
+ */
+int
+vn_cross_single_mount(struct vnode* vp, int root_lkflags,
+ bool *unlocked, struct vnode **vpp)
+{
+ struct mount *mp;
+ int error;
+
+ *vpp = NULL;
+
+ error = vn_busy_mountedhere(vp, unlocked, &mp);
+ if (__predict_false(error == 0)) {
+ if (__predict_true(!*unlocked)) {
+ VOP_UNLOCK(vp);
+ *unlocked = true;
+ }
+ error = VFS_ROOT(mp, root_lkflags, vpp);
+ vfs_unbusy(mp);
+ }
+
+ return (error);
+}
+
+static void
+vn_lock_enforced_flags(struct vnode *vp, int lkflags)
+{
+ int error __unused;
+
+ error = vn_lock(vp, enforce_lkflags(vp->v_mount, lkflags | LK_RETRY));
+ KASSERT(error == 0,
+ ("%s: vn_lock(LK_RETRY) returned %d", __func__, error));
+}
+
+/*
+ * Repeatedly cross mounts starting from a given vnode.
+ *
+ * Traverses all successive mounts on the same path, locking the successive
+ * vnodes as specified by enforce_lkflags() and unlocking them after obtaining
+ * their covering mount. Ensures the final vnode is locked and actively
+ * referenced. The initial vnode is returned unlocked and its active reference
+ * is released except if it is also the final vnode (no mount points to cross).
+ *
+ * Mounts are crossed until reaching vnode that is not covered by a mount, which
+ * is returned locked. If some traversed vnode happens to be doomed, ENOENT is
+ * returned. Can return errors reported by VFS_ROOT(). On success, puts the
+ * final vnode into '*vpp' and returns 0.
+ *
+ * This function ensures that the crossed mountpoint cannot be busied and the
+ * initial vnode locked at the same time. The goal is to avoid establishing
+ * a lock order between them in order to avoid deadlocks, at lookup with mounted
+ * stacked filesystems (nullfs, unionfs) where locking the mountpoint's root
+ * vnode leads to locking the covered vnode as well and vice-versa, but also at
+ * unmount where parallel vfs_busy() calls block while acquiring the covered
+ * vnode's lock, which establishes the acquisition order mount point -> covered
+ * vnode. This function (through the VFS_ROOT() call) only establishes the
+ * acquisition order mount point -> root vnode, which implies mount point ->
+ * covered vnode for stacked filesystems, thus the same order as that of
+ * dounmount(). In other words, the legal order is that a mount point reference
+ * must always be acquired before the vnode's lock, be it the root vnode under
+ * the mount point or the covered vnode over it.
+ */
+int
+vn_cross_mounts(struct vnode* vp, int const lkflags, struct vnode ** const vpp)
+{
+ int error;
+ bool unlocked;
+
+ for (;;) {
+ error = vn_cross_single_mount(vp, lkflags, &unlocked, vpp);
+
+ /* Optimize for the non-mount-point case. */
+ if (__predict_true(error == EJUSTRETURN)) {
+ /* No more mounts to cross. */
+ *vpp = vp;
+ error = 0;
+
+ if (__predict_false(unlocked)) {
+ vn_lock_enforced_flags(vp, lkflags);
+ if (VN_IS_DOOMED(vp)) {
+ vput(vp);
+
+ *vpp = NULL;
+ error = ENOENT;
+ }
+ }
+
+ return (error);
+ }
+
+ if (__predict_false(error != 0)) {
+ if (__predict_true(unlocked))
+ vrele(vp);
+ else
+ vput(vp);
+
+ return (error);
+ }
+
+ /* Crossed one mount. Try to cross another one. */
+ MPASS(unlocked);
+ ASSERT_VOP_UNLOCKED(vp, __func__);
+ vrele(vp);
+ vp = *vpp;
+ ASSERT_VOP_LOCKED(vp, __func__);
+ }
+
+ __assert_unreachable();
+}
+
struct nameicap_tracker {
struct vnode *dp;
TAILQ_ENTRY(nameicap_tracker) nm_link;
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -826,6 +826,12 @@
int vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset, int xfersize,
struct uio *uio);
+int vn_busy_mountedhere(struct vnode *vp, bool *unlocked,
+ struct mount **mp);
+int vn_cross_single_mount(struct vnode* vp, int root_lkflags,
+ bool *unlocked, struct vnode **vpp);
+int vn_cross_mounts(struct vnode* vp, int lkflags, struct vnode **vpp);
+
void vn_seqc_write_begin_locked(struct vnode *vp);
void vn_seqc_write_begin(struct vnode *vp);
void vn_seqc_write_end_locked(struct vnode *vp);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Nov 16, 5:21 PM (22 h, 4 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14661981
Default Alt Text
D40850.diff (8 KB)
Attached To
Mode
D40850: VFS lookup: New vn_cross_single_mount() and vn_cross_mounts()
Attached
Detach File
Event Timeline
Log In to Comment