Page MenuHomeFreeBSD

D46296.diff
No OneTemporary

D46296.diff

diff --git a/sys/conf/files b/sys/conf/files
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3561,6 +3561,7 @@
fs/fuse/fuse_node.c optional fusefs
fs/fuse/fuse_vfsops.c optional fusefs
fs/fuse/fuse_vnops.c optional fusefs
+fs/fuse/virtiofs_vfsops.c optional virtiofs
fs/mntfs/mntfs_vnops.c standard
fs/msdosfs/msdosfs_conv.c optional msdosfs
fs/msdosfs/msdosfs_denode.c optional msdosfs
diff --git a/sys/fs/fuse/fuse_internal.c b/sys/fs/fuse/fuse_internal.c
--- a/sys/fs/fuse/fuse_internal.c
+++ b/sys/fs/fuse/fuse_internal.c
@@ -982,7 +982,8 @@
if ((err = tick->tk_aw_ohead.error)) {
goto out;
}
- if ((err = fticket_pull(tick, uio))) {
+
+ if (!fsess_get_virtiofs(data) && (err = fticket_pull(tick, uio))) {
goto out;
}
fiio = fticket_resp(tick)->base;
@@ -1001,7 +1002,8 @@
}
if (fuse_libabi_geq(data, 7, 5)) {
- if (fticket_resp(tick)->len == sizeof(struct fuse_init_out) ||
+ if (fsess_get_virtiofs(data) ||
+ fticket_resp(tick)->len == sizeof(struct fuse_init_out) ||
fticket_resp(tick)->len == FUSE_COMPAT_22_INIT_OUT_SIZE) {
data->max_write = fiio->max_write;
if (fiio->flags & FUSE_ASYNC_READ)
diff --git a/sys/fs/fuse/fuse_ipc.h b/sys/fs/fuse/fuse_ipc.h
--- a/sys/fs/fuse/fuse_ipc.h
+++ b/sys/fs/fuse/fuse_ipc.h
@@ -65,6 +65,7 @@
#include <sys/param.h>
#include <sys/refcount.h>
+#include <sys/taskqueue.h>
enum fuse_data_cache_mode {
FUSE_CACHE_UC,
@@ -83,6 +84,7 @@
void fiov_teardown(struct fuse_iov *fiov);
void fiov_refresh(struct fuse_iov *fiov);
void fiov_adjust(struct fuse_iov *fiov, size_t size);
+int fiov_adjust_nowait(struct fuse_iov *fiov, size_t size);
#define FUSE_DIMALLOC(fiov, spc1, spc2, amnt) do { \
fiov_adjust(fiov, (sizeof(*(spc1)) + (amnt))); \
@@ -130,6 +132,8 @@
struct mtx tk_aw_mtx;
fuse_handler_t *tk_aw_handler;
TAILQ_ENTRY(fuse_ticket) tk_aw_link;
+
+ struct task tk_vtfs_tk;
};
#define FT_ANSW 0x01 /* request of ticket has already been answered */
@@ -168,6 +172,8 @@
}
int fticket_pull(struct fuse_ticket *ftick, struct uio *uio);
+size_t fticket_out_size(struct fuse_ticket *ftick);
+int fuse_body_audit(struct fuse_ticket *ftick, size_t blen);
/*
* The data representing a FUSE session.
@@ -219,6 +225,13 @@
uint64_t isimpl;
uint64_t mnt_flag;
enum fuse_data_cache_mode cache_mode;
+
+ /* Fields necessary for virtiofs. */
+ struct vtfs_softc *vtfs;
+ struct taskqueue *vtfs_tq;
+ void (*vtfs_flush_cb)(void *, int);
+ void (*virtiofs_unmount_cb)(void *);
+
};
#define FSESS_DEAD 0x0001 /* session is to be closed */
@@ -240,6 +253,7 @@
#define FSESS_WARN_WB_CACHE_INCOHERENT 0x400000 /* WB cache incoherent */
#define FSESS_WARN_ILLEGAL_INODE 0x800000 /* Illegal inode for new file */
#define FSESS_WARN_READLINK_EMBEDDED_NUL 0x1000000 /* corrupt READLINK output */
+#define FSESS_VIRTIOFS 0x2000000 /* session backed by virtio device */
#define FSESS_MNTOPTS_MASK ( \
FSESS_DAEMON_CAN_SPY | FSESS_PUSH_SYMLINKS_IN | \
FSESS_DEFAULT_PERMISSIONS | FSESS_INTR)
@@ -414,6 +428,12 @@
return (data->dataflags & FSESS_DEAD);
}
+static inline bool
+fsess_get_virtiofs(struct fuse_data *data)
+{
+ return (data->dataflags & FSESS_VIRTIOFS);
+}
+
struct fuse_dispatcher {
struct fuse_ticket *tick;
struct fuse_in_header *finh;
diff --git a/sys/fs/fuse/fuse_ipc.c b/sys/fs/fuse/fuse_ipc.c
--- a/sys/fs/fuse/fuse_ipc.c
+++ b/sys/fs/fuse/fuse_ipc.c
@@ -76,6 +76,7 @@
#include <sys/proc.h>
#include <sys/mount.h>
#include <sys/sdt.h>
+#include <sys/sglist.h>
#include <sys/vnode.h>
#include <sys/signalvar.h>
#include <sys/syscallsubr.h>
@@ -87,6 +88,8 @@
#include "fuse_ipc.h"
#include "fuse_internal.h"
+#include <dev/virtio/fs/virtio_fs.h>
+
SDT_PROVIDER_DECLARE(fusefs);
/*
* Fuse trace probe:
@@ -107,8 +110,6 @@
fticket_aw_pull_uio(struct fuse_ticket *ftick,
struct uio *uio);
-static int fuse_body_audit(struct fuse_ticket *ftick, size_t blen);
-
static fuse_handler_t fuse_standard_handler;
static counter_u64_t fuse_ticket_count;
@@ -276,17 +277,19 @@
free(fiov->base, M_FUSEMSG);
}
-void
-fiov_adjust(struct fuse_iov *fiov, size_t size)
+static int
+fiov_adjust_internal(struct fuse_iov *fiov, size_t size, int flag)
{
+ KASSERT(flag == M_NOWAIT || flag == M_WAITOK, ("invalid flag %x", flag));
+
if (fiov->allocated_size < size ||
(fuse_iov_permanent_bufsize >= 0 &&
fiov->allocated_size - size > fuse_iov_permanent_bufsize &&
--fiov->credit < 0)) {
fiov->base = realloc(fiov->base, FU_AT_LEAST(size), M_FUSEMSG,
- M_WAITOK | M_ZERO);
+ flag | M_ZERO);
if (!fiov->base) {
- panic("FUSE: realloc failed");
+ return (ENOMEM);
}
fiov->allocated_size = FU_AT_LEAST(size);
fiov->credit = fuse_iov_credit;
@@ -297,6 +300,26 @@
bzero((char*)fiov->base + fiov->len, size - fiov->len);
}
fiov->len = size;
+
+ return(0);
+}
+
+int
+fiov_adjust_nowait(struct fuse_iov *fiov, size_t size)
+{
+ fiov_adjust_internal(fiov, size, M_NOWAIT);
+ if (!fiov->base)
+ return (ENOMEM);
+
+ return (0);
+}
+
+void
+fiov_adjust(struct fuse_iov *fiov, size_t size)
+{
+ fiov_adjust_internal(fiov, size, M_WAITOK);
+ if (!fiov->base)
+ panic("FUSE: realloc failed");
}
/* Resize the fiov if needed, and clear it's buffer */
@@ -646,6 +669,25 @@
fuse_lck_mtx_unlock(ftick->tk_data->aw_mtx);
}
+
+/* Notify users of the FUSE device of the new ticket. */
+static void
+fuse_notify_fuse_device(struct fuse_data *data)
+{
+ wakeup_one(data);
+ selwakeuppri(&data->ks_rsel, PZERO + 1);
+ KNOTE_LOCKED(&data->ks_rsel.si_note, 0);
+}
+
+static void
+fuse_notify_virtiofs(struct fuse_ticket *ftick)
+{
+ struct fuse_data *data = ftick->tk_data;
+
+ TASK_INIT(&ftick->tk_vtfs_tk, 0, data->vtfs_flush_cb, data);
+ taskqueue_enqueue(data->vtfs_tq, &ftick->tk_vtfs_tk);
+}
+
/*
* Insert a new upgoing ticket into the message queue
*
@@ -655,26 +697,165 @@
void
fuse_insert_message(struct fuse_ticket *ftick, bool urgent)
{
+ struct fuse_data *data = ftick->tk_data;
+
if (ftick->tk_flag & FT_DIRTY) {
panic("FUSE: ticket reused without being refreshed");
}
ftick->tk_flag |= FT_DIRTY;
- if (fdata_get_dead(ftick->tk_data)) {
+ if (fdata_get_dead(data)) {
return;
}
- fuse_lck_mtx_lock(ftick->tk_data->ms_mtx);
+
+ fuse_lck_mtx_lock(data->ms_mtx);
+
if (urgent)
fuse_ms_push_head(ftick);
else
fuse_ms_push(ftick);
- wakeup_one(ftick->tk_data);
- selwakeuppri(&ftick->tk_data->ks_rsel, PZERO + 1);
- KNOTE_LOCKED(&ftick->tk_data->ks_rsel.si_note, 0);
- fuse_lck_mtx_unlock(ftick->tk_data->ms_mtx);
+
+ /* Choose between the virtiofs and FUSE paths. */
+ if (fsess_get_virtiofs(data))
+ fuse_notify_virtiofs(ftick);
+ else
+ fuse_notify_fuse_device(data);
+
+ fuse_lck_mtx_unlock(data->ms_mtx);
}
-static int
+/* Special case for read tickets. Reply size depends on the specified length. */
+static size_t
+fticket_out_size_read(struct fuse_ticket *ftick)
+{
+ struct fuse_read_in *read_header;
+ uintptr_t in_header;
+
+ in_header = (uintptr_t) ftick->tk_ms_fiov.base;
+ read_header = (struct fuse_read_in *)(in_header + sizeof(struct fuse_in_header));
+ return (read_header->size);
+}
+
+size_t
+fticket_out_size(struct fuse_ticket *ftick)
+{
+ enum fuse_opcode opcode;
+
+ opcode = fticket_opcode(ftick);
+
+ switch (opcode) {
+ case FUSE_BMAP:
+ return (sizeof(struct fuse_bmap_out));
+
+ case FUSE_LINK:
+ case FUSE_LOOKUP:
+ case FUSE_MKDIR:
+ case FUSE_MKNOD:
+ case FUSE_SYMLINK:
+ if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
+ return (sizeof(struct fuse_entry_out));
+ } else {
+ return (FUSE_COMPAT_ENTRY_OUT_SIZE);
+ }
+
+ case FUSE_FORGET:
+ return (0);
+
+ case FUSE_GETATTR:
+ case FUSE_SETATTR:
+ if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
+ return (sizeof(struct fuse_attr_out));
+ } else {
+ return (FUSE_COMPAT_ATTR_OUT_SIZE);
+ }
+
+ case FUSE_READLINK:
+ /* We are expecting to read back a POSIX path. */
+ return (PATH_MAX);
+
+ case FUSE_UNLINK:
+ case FUSE_RMDIR:
+ case FUSE_RENAME:
+ return (0);
+
+ case FUSE_OPEN:
+ return (sizeof(struct fuse_open_out));
+
+ case FUSE_READ:
+ return (fticket_out_size_read(ftick));
+
+ case FUSE_WRITE:
+ return (sizeof(struct fuse_write_out));
+
+ case FUSE_STATFS:
+ if (fuse_libabi_geq(ftick->tk_data, 7, 4)) {
+ return (sizeof(struct fuse_statfs_out));
+ } else {
+ return (FUSE_COMPAT_STATFS_SIZE);
+ }
+
+ case FUSE_RELEASE:
+ case FUSE_FSYNC:
+ case FUSE_SETXATTR:
+ return (0);
+
+ case FUSE_GETXATTR:
+ return (sizeof(struct fuse_getxattr_out));
+
+ case FUSE_LISTXATTR:
+ return (sizeof(struct fuse_listxattr_out));
+
+ case FUSE_REMOVEXATTR:
+ case FUSE_FLUSH:
+ return (0);
+
+ case FUSE_INIT:
+ return (sizeof(struct fuse_init_out));
+
+ case FUSE_OPENDIR:
+ return (sizeof(struct fuse_open_out));
+
+ case FUSE_READDIR:
+ return (fticket_out_size_read(ftick));
+
+ case FUSE_RELEASEDIR:
+ case FUSE_FSYNCDIR:
+ return (0);
+
+ case FUSE_GETLK:
+ return (sizeof(struct fuse_lk_out));
+
+ case FUSE_SETLK:
+ case FUSE_SETLKW:
+ case FUSE_ACCESS:
+ return (0);
+
+ case FUSE_CREATE:
+ if (fuse_libabi_geq(ftick->tk_data, 7, 9)) {
+ return (sizeof(struct fuse_entry_out) +
+ sizeof(struct fuse_open_out));
+ } else {
+ return (FUSE_COMPAT_ENTRY_OUT_SIZE +
+ sizeof(struct fuse_open_out));
+ }
+
+ case FUSE_INTERRUPT:
+ case FUSE_DESTROY:
+ case FUSE_FALLOCATE:
+ return (0);
+
+ case FUSE_LSEEK:
+ return (sizeof(struct fuse_lseek_out));
+
+ case FUSE_COPY_FILE_RANGE:
+ return (sizeof(struct fuse_write_out));
+
+ default:
+ panic("FUSE: opcodes out of sync (%d)\n", opcode);
+ }
+}
+
+int
fuse_body_audit(struct fuse_ticket *ftick, size_t blen)
{
int err = 0;
@@ -887,7 +1068,9 @@
{
int err = 0;
- err = fticket_pull(ftick, uio);
+ /* Data already pulled for virtiofs. */
+ if (uio != NULL)
+ err = fticket_pull(ftick, uio);
fuse_lck_mtx_lock(ftick->tk_aw_mtx);
diff --git a/sys/fs/fuse/fuse_vfsops.h b/sys/fs/fuse/fuse_vfsops.h
new file mode 100644
--- /dev/null
+++ b/sys/fs/fuse/fuse_vfsops.h
@@ -0,0 +1,13 @@
+#ifndef _FUSE_VFSOPS_H_
+#define _FUSE_VFSOPS_H_
+
+vfs_fhtovp_t fuse_vfsop_fhtovp;
+vfs_mount_t fuse_vfsop_mount;
+vfs_unmount_t fuse_vfsop_unmount;
+vfs_root_t fuse_vfsop_root;
+vfs_statfs_t fuse_vfsop_statfs;
+vfs_vget_t fuse_vfsop_vget;
+
+void virtiofs_teardown(void *arg);
+
+#endif /* _FUSE_VFSOPS_H_ */
diff --git a/sys/fs/fuse/fuse_vfsops.c b/sys/fs/fuse/fuse_vfsops.c
--- a/sys/fs/fuse/fuse_vfsops.c
+++ b/sys/fs/fuse/fuse_vfsops.c
@@ -81,15 +81,19 @@
#include <sys/mount.h>
#include <sys/sysctl.h>
#include <sys/fcntl.h>
+#include <sys/sglist.h>
#include "fuse.h"
#include "fuse_node.h"
#include "fuse_ipc.h"
#include "fuse_internal.h"
+#include "fuse_vfsops.h"
#include <sys/priv.h>
#include <security/mac/mac_framework.h>
+#include <dev/virtio/fs/virtio_fs.h>
+
SDT_PROVIDER_DECLARE(fusefs);
/*
* Fuse trace probe:
@@ -109,13 +113,6 @@
#define PRIV_VFS_FUSE_SYNC_UNMOUNT PRIV_VFS_MOUNT_NONUSER
#endif
-static vfs_fhtovp_t fuse_vfsop_fhtovp;
-static vfs_mount_t fuse_vfsop_mount;
-static vfs_unmount_t fuse_vfsop_unmount;
-static vfs_root_t fuse_vfsop_root;
-static vfs_statfs_t fuse_vfsop_statfs;
-static vfs_vget_t fuse_vfsop_vget;
-
struct vfsops fuse_vfsops = {
.vfs_fhtovp = fuse_vfsop_fhtovp,
.vfs_mount = fuse_vfsop_mount,
@@ -262,7 +259,7 @@
return err;
}
-static int
+int
fuse_vfsop_fhtovp(struct mount *mp, struct fid *fhp, int flags,
struct vnode **vpp)
{
@@ -290,7 +287,7 @@
return (0);
}
-static int
+int
fuse_vfsop_mount(struct mount *mp)
{
int err;
@@ -466,7 +463,7 @@
return err;
}
-static int
+int
fuse_vfsop_unmount(struct mount *mp, int mntflags)
{
int err = 0;
@@ -509,7 +506,11 @@
fdisp_destroy(&fdi);
}
- fdata_set_dead(data);
+
+ if (fsess_get_virtiofs(data))
+ data->virtiofs_unmount_cb((void *)data);
+ else
+ fdata_set_dead(data);
alreadydead:
FUSE_LOCK();
@@ -522,14 +523,15 @@
mp->mnt_data = NULL;
MNT_IUNLOCK(mp);
- dev_rel(fdev);
+ if (fdev != NULL)
+ dev_rel(fdev);
return 0;
}
SDT_PROBE_DEFINE1(fusefs, , vfsops, invalidate_without_export,
"struct mount*");
-static int
+int
fuse_vfsop_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
{
struct fuse_data *data = fuse_get_mpdata(mp);
@@ -595,7 +597,7 @@
return error;
}
-static int
+int
fuse_vfsop_root(struct mount *mp, int lkflags, struct vnode **vpp)
{
struct fuse_data *data = fuse_get_mpdata(mp);
@@ -631,7 +633,7 @@
return err;
}
-static int
+int
fuse_vfsop_statfs(struct mount *mp, struct statfs *sbp)
{
struct fuse_dispatcher fdi;
diff --git a/sys/fs/fuse/virtiofs_vfsops.c b/sys/fs/fuse/virtiofs_vfsops.c
new file mode 100644
--- /dev/null
+++ b/sys/fs/fuse/virtiofs_vfsops.c
@@ -0,0 +1,514 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024, Emil Tsalapatis <emil@etsalapatis.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/buf.h>
+#include <sys/lock.h>
+#include <sys/queue.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/sglist.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+
+#include "fuse.h"
+#include "fuse_kernel.h"
+#include "fuse_internal.h"
+#include "fuse_ipc.h"
+#include "fuse_vfsops.h"
+
+#include <dev/virtio/fs/virtio_fs.h>
+
+#include <compat/linux/linux_errno.h>
+#include <compat/linux/linux_errno.inc>
+
+#define VIRTIOFS_THREADS_TQ (8)
+
+static vfs_mount_t virtiofs_vfsop_mount;
+
+/* Only mount/unmount is different compared to fuse. */
+static struct vfsops virtiofs_vfsops = {
+ .vfs_fhtovp = fuse_vfsop_fhtovp,
+ .vfs_mount = virtiofs_vfsop_mount,
+ .vfs_unmount = fuse_vfsop_unmount,
+ .vfs_root = fuse_vfsop_root,
+ .vfs_statfs = fuse_vfsop_statfs,
+ .vfs_vget = fuse_vfsop_vget,
+};
+
+static struct vfsconf virtiofs_vfsconf = {
+ .vfc_version = VFS_VERSION,
+ .vfc_name = "virtiofs",
+ .vfc_vfsops = &virtiofs_vfsops,
+ .vfc_typenum = -1,
+ .vfc_flags = VFCF_JAIL | VFCF_SYNTHETIC
+};
+
+static int
+virtiofs_loader(struct module *m, int what, void *arg)
+{
+ int error = 0;
+
+ switch (what) {
+ case MOD_LOAD:
+ error = vfs_modevent(NULL, what, &virtiofs_vfsconf);
+ break;
+ case MOD_UNLOAD:
+ error = vfs_modevent(NULL, what, &virtiofs_vfsconf);
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ return (error);
+}
+
+/* Registering the module */
+
+static moduledata_t virtiofs_moddata = {
+ "virtiofs",
+ virtiofs_loader,
+ &virtiofs_vfsconf
+};
+
+DECLARE_MODULE(virtiofs, virtiofs_moddata, SI_SUB_VFS, SI_ORDER_MIDDLE);
+MODULE_DEPEND(virtiofs, fusefs, 1, 1, 1);
+MODULE_DEPEND(virtiofs, vtfs, 1, 1, 1);
+MODULE_VERSION(virtiofs, 1);
+
+/* Push the ticket to the virtiofs device. */
+static int
+virtiofs_enqueue(struct fuse_ticket *ftick)
+{
+ struct fuse_out_header *ohead = &ftick->tk_aw_ohead;
+ struct fuse_data *data = ftick->tk_data;
+ struct fuse_iov *riov, *wiov;
+ struct sglist *sg = NULL;
+ int readable, writable;
+ bool urgent;
+ int error;
+
+ urgent = (fticket_opcode(ftick) == FUSE_FORGET);
+
+ riov = &ftick->tk_ms_fiov;
+ wiov = &ftick->tk_aw_fiov;
+
+ refcount_acquire(&ftick->tk_refcount);
+
+ /* Preallocate the response buffer. */
+ error = fiov_adjust_nowait(wiov, fticket_out_size(ftick));
+ if (error != 0)
+ goto out;
+
+ /* Readable/writable from the host's point of view. */
+ readable = sglist_count(riov->base, riov->len);
+
+ /* Account for the out header. */
+ writable = sglist_count(ohead, sizeof(*ohead)) +
+ sglist_count(wiov->base, wiov->len);
+
+ sg = sglist_alloc(readable + writable, M_NOWAIT);
+ if (sg == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+
+ error = sglist_append(sg, riov->base, riov->len);
+ if (error != 0)
+ goto out;
+
+ error = sglist_append(sg, ohead, sizeof(*ohead));
+ if (error != 0)
+ goto out;
+
+ error = sglist_append(sg, wiov->base, wiov->len);
+ if (error != 0)
+ goto out;
+
+ error = vtfs_enqueue(data->vtfs, ftick, sg, readable, writable, urgent);
+
+ /*
+ * The enqueue call destroys the scatter-gather array both on success and
+ * on failure, so no need to clean it up.
+ */
+
+ return (error);
+
+out:
+ fuse_ticket_drop(ftick);
+ if (sg != NULL)
+ sglist_free(sg);
+
+ return (error);
+}
+
+static void
+virtiofs_flush(void *xdata, int __unused pending)
+{
+ struct fuse_ticket *ftick;
+ struct fuse_data *data = xdata;
+ int error;
+
+ fuse_lck_mtx_lock(data->ms_mtx);
+
+ while (!STAILQ_EMPTY(&data->ms_head)) {
+ ftick = STAILQ_FIRST(&data->ms_head);
+
+ STAILQ_REMOVE_HEAD(&data->ms_head, tk_ms_link);
+ data->ms_count--;
+
+ KASSERT(ftick != STAILQ_FIRST(&data->ms_head), ("ticket still in the queue"));
+
+#ifdef INVARIANTS
+ MPASS(data->ms_count >= 0);
+ ftick->tk_ms_link.stqe_next = NULL;
+#endif
+
+ FUSE_ASSERT_MS_DONE(ftick);
+ fuse_ticket_drop(ftick);
+
+ /*
+ * The enqueue operation is synchronous and may sleep,
+ * so drop the session lock - we have already adjusted
+ * all session fields so we don't need it while flushing
+ * to the virtio device anyway.
+ */
+ fuse_lck_mtx_unlock(data->ms_mtx);
+ error = virtiofs_enqueue(ftick);
+ fuse_lck_mtx_lock(data->ms_mtx);
+ if (error != 0)
+ break;
+ }
+
+ fuse_lck_mtx_unlock(data->ms_mtx);
+
+ if (error != 0)
+ printf("Warning: %s failed with %d\n", __func__, error);
+
+ return;
+}
+
+static void
+virtiofs_cb_forget_ticket(void *xtick, uint32_t len __unused)
+{
+}
+
+static void
+virtiofs_drop_intr_tick(struct fuse_data *data, struct fuse_ticket *ftick)
+{
+ struct fuse_ticket *itick, *x_tick;
+
+ TAILQ_FOREACH_SAFE(itick, &data->aw_head, tk_aw_link, x_tick) {
+ if (itick->tk_unique == ftick->irq_unique) {
+ fuse_aw_remove(itick);
+ fuse_ticket_drop(itick);
+ break;
+ }
+ }
+
+ ftick->irq_unique = 0;
+}
+
+static int
+virtiofs_handle_async_tick(struct fuse_data *data, struct fuse_ticket *ftick, int oerror)
+{
+ struct mount *mp = data->mp;
+ struct iovec aiov;
+ struct uio uio;
+ int err = 0;
+
+ /*
+ * Form a uio and pass it to the message handlers, because unlike other
+ * messages they do not use ftick->tk_aw_fiov to store the message body.
+ */
+ aiov.iov_base = fticket_resp(ftick)->base;
+ aiov.iov_len = fticket_resp(ftick)->len;
+
+ uio.uio_iov = (struct iovec *)&aiov;
+ uio.uio_iovcnt = 1;
+ uio.uio_resid = aiov.iov_len;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_rw = UIO_WRITE;
+ uio.uio_td = curthread;
+ uio.uio_offset = 0;
+
+ /* Only handle the two async messages that the FUSE device does. */
+ switch (oerror) {
+ case FUSE_NOTIFY_INVAL_ENTRY:
+ err = fuse_internal_invalidate_entry(mp, &uio);
+ break;
+ case FUSE_NOTIFY_INVAL_INODE:
+ err = fuse_internal_invalidate_inode(mp, &uio);
+ break;
+ default:
+ err = ENOSYS;
+ }
+
+ if (err != 0) {
+ printf("WARNING: error %d when handling async message of type %d\n",
+ err, fticket_opcode(ftick));
+ }
+
+ return (err);
+}
+
+static bool
+virtiofs_remove_ticket(struct fuse_data *data, struct fuse_ticket *ftick)
+{
+ struct fuse_ticket *tick, *x_tick;
+
+ mtx_assert(&data->aw_mtx, MA_OWNED);
+
+ TAILQ_FOREACH_SAFE(tick, &data->aw_head, tk_aw_link, x_tick) {
+ if (tick->tk_unique != ftick->tk_aw_ohead.unique)
+ continue;
+
+ MPASS(tick == ftick);
+ fuse_aw_remove(ftick);
+
+ return (true);
+ }
+
+ return (false);
+}
+
+static void
+virtiofs_cb_complete_ticket(void *xtick, uint32_t len)
+{
+ struct fuse_ticket *ftick = xtick;
+ struct fuse_data *data = ftick->tk_data;
+ struct fuse_out_header *ohead = &ftick->tk_aw_ohead;
+ bool found;
+ int err;
+
+ /* Validate the length field of the out header. */
+ if (len != ohead->len) {
+ err = EINVAL;
+ goto done;
+ }
+
+ /* Error responses to tickets do not have a body. */
+ if (len > sizeof(*ohead) && ohead->unique != 0 && ohead->error) {
+ err = EINVAL;
+ goto done;
+ }
+
+ /* Ensure that out headers that return an error are valid. */
+ if (data->linux_errnos != 0 && ohead->error != 0) {
+ err = -ohead->error;
+ if (err < 0 || err >= nitems(linux_to_bsd_errtbl))
+ goto done;
+
+ /* '-', because it will get flipped again below */
+ ohead->error = -linux_to_bsd_errtbl[err];
+ }
+
+ /* Remove the ticket from the answer queue. */
+ fuse_lck_mtx_lock(data->aw_mtx);
+
+ found = virtiofs_remove_ticket(data, ftick);
+
+ /*
+ * We should not be able to find a non-unique ticket, and
+ * all unique tickets should still be in the queue.
+ */
+ KASSERT(found == (ohead->unique != 0),
+ ("inconsistency in answer queue:"
+ "found %d unique %lu", found, ohead->unique));
+
+ /* Drop any pending interrupts for the completed ticket. */
+ if (found && ftick->irq_unique > 0)
+ virtiofs_drop_intr_tick(data, ftick);
+
+ fuse_lck_mtx_unlock(data->aw_mtx);
+
+ if (found) {
+ if (ftick->tk_aw_handler) {
+ /* Sanitize the linuxism of negative errnos */
+ ohead->error *= -1;
+
+ /* Illegal error code, treat it as EIO. */
+ if (ohead->error < 0 || ohead->error > ELAST) {
+ ohead->error = EIO;
+ ftick->tk_aw_handler(ftick, NULL);
+ err = EINVAL;
+ } else {
+ err = ftick->tk_aw_handler(ftick, NULL);
+ }
+ }
+
+ fuse_ticket_drop(ftick);
+ } else if (ohead->unique == 0) {
+ err = virtiofs_handle_async_tick(data, ftick, ohead->error);
+ }
+
+ /* If the operation was successful, ensure the size is valid. */
+ if (ohead->error == 0 && ohead->unique != 0)
+ err = fuse_body_audit(ftick, len - sizeof(*ohead));
+
+done:
+ /*
+ * If something goes wrong, err on the side of caution and kill the session
+ * because the FUSE server in the host is misbehaving.
+ */
+ if (err != 0)
+ fdata_set_dead(data);
+
+ return;
+
+}
+
+static int
+virtiofs_vfsop_mount(struct mount *mp)
+{
+ /* Turn interrupts on by default, existing virtiofsd servers use them anyway. */
+ const uint64_t mntopts = FSESS_VIRTIOFS;
+ struct thread *td = curthread;
+ struct vfsoptlist *opts;
+ struct fuse_data *data;
+ vtfs_instance vtfs;
+ uint32_t max_read;
+ char *tag;
+ int error;
+
+ opts = mp->mnt_optnew;
+ if (opts == NULL)
+ return (EINVAL);
+
+ /* `fspath' contains the mount point (eg. /mnt/guestfs); REQUIRED */
+ if (!vfs_getopts(opts, "fspath", &error))
+ return (error);
+
+ max_read = maxbcachebuf;
+ (void)vfs_scanopt(opts, "max_read=", "%u", &max_read);
+
+
+ /* XXX Remounts not handled for now, but should be easy to code in. */
+ if (mp->mnt_flag & MNT_UPDATE)
+ return (EOPNOTSUPP);
+
+ /* `from' contains the virtio tag; REQUIRED */
+ tag = vfs_getopts(opts, "tag", &error);
+ if (!tag)
+ return (error);
+
+ error = vtfs_find(tag, &vtfs);
+ if (error != 0)
+ return (error);
+
+ data = fdata_alloc(NULL, td->td_ucred);
+
+ vtfs_register_cb(vtfs, virtiofs_cb_forget_ticket, virtiofs_cb_complete_ticket,
+ virtiofs_teardown, data);
+
+ FUSE_LOCK();
+ KASSERT(!fdata_get_dead(data), ("allocated dead session"));
+
+ data->vtfs_tq = taskqueue_create("virtiofstq", M_NOWAIT, taskqueue_thread_enqueue,
+ &data->vtfs_tq);
+ if (data->vtfs_tq == NULL)
+ panic("ENOMEM when initializing taskqueue");
+
+ data->vtfs = vtfs;
+ data->vtfs_flush_cb = virtiofs_flush;
+ data->virtiofs_unmount_cb = virtiofs_teardown;
+ data->mp = mp;
+ /*
+ * XXX We currently do not support any mount options. This is due because it is
+ * hard to test for it, even though most FUSE options should be trivially easy
+ * to add. Deliberately defer enabling them until we can reuse the FUSE test
+ * suite for virtiofs.
+ */
+ data->dataflags |= mntopts;
+ data->max_read = max_read;
+ data->daemon_timeout = FUSE_MIN_DAEMON_TIMEOUT;
+ data->linux_errnos = 1;
+ data->mnt_flag = mp->mnt_flag & MNT_UPDATEMASK;
+ FUSE_UNLOCK();
+
+ KASSERT(!fdata_get_dead(data), ("newly created fuse session is dead"));
+
+ vfs_getnewfsid(mp);
+ MNT_ILOCK(mp);
+ mp->mnt_data = data;
+ mp->mnt_flag &= ~MNT_LOCAL;
+ mp->mnt_kern_flag |= MNTK_USES_BCACHE;
+ /*
+ * The FS is remote by default. Disable nullfs caching to avoid
+ * the extra coherence cost, same as FUSE.
+ */
+ mp->mnt_kern_flag |= MNTK_NULL_NOCACHE;
+ MNT_IUNLOCK(mp);
+
+ mp->mnt_stat.f_iosize = maxbcachebuf;
+ strlcat(mp->mnt_stat.f_fstypename, ".virtiofs", MFSNAMELEN);
+ memset(mp->mnt_stat.f_mntfromname, 0, MNAMELEN);
+ strlcpy(mp->mnt_stat.f_mntfromname, tag, MNAMELEN);
+ mp->mnt_iosize_max = maxphys;
+
+ error = taskqueue_start_threads(&data->vtfs_tq, VIRTIOFS_THREADS_TQ, PVFS, "virtiofs_tq");
+ if (error != 0)
+ panic("error when initializing taskqueue threads");
+
+ /* Now handshaking with daemon */
+ fuse_internal_send_init(data, td);
+
+ return (0);
+}
+
+void
+virtiofs_teardown(void *xdata)
+{
+ struct fuse_data *data = (struct fuse_data *)xdata;
+ vtfs_instance vtfs = data->vtfs;
+
+ /* Mark the session as dead to prevent new requests. */
+ fdata_set_dead(data);
+
+ /*
+ * Flush out all pending requests into the virtio
+ * device. After this, there are no host-bound
+ * requests in flight.
+ */
+ taskqueue_drain_all(data->vtfs_tq);
+ taskqueue_free(data->vtfs_tq);
+
+ /*
+ * Turn off the device and handle all received
+ * requests. After this there are no guest-bound
+ * requests in flight, completing virtiofs teardown.
+ */
+ vtfs_drain(vtfs);
+
+ vtfs_unregister_cb(vtfs);
+ vtfs_release(vtfs);
+}
+
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@@ -404,6 +404,7 @@
${_vesa} \
${_vf_i2c} \
virtio \
+ virtiofs \
vge \
${_viawd} \
videomode \
diff --git a/sys/modules/virtiofs/Makefile b/sys/modules/virtiofs/Makefile
new file mode 100644
--- /dev/null
+++ b/sys/modules/virtiofs/Makefile
@@ -0,0 +1,6 @@
+.PATH: ${SRCTOP}/sys/fs/fuse
+
+KMOD= virtiofs
+SRCS= virtiofs_vfsops.c
+
+.include <bsd.kmod.mk>

File Metadata

Mime Type
text/plain
Expires
Thu, Jan 9, 5:03 AM (2 h, 14 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15729751
Default Alt Text
D46296.diff (26 KB)

Event Timeline