Page MenuHomeFreeBSD

D13267.diff
No OneTemporary

D13267.diff

Index: sys/kern/vfs_extattr.c
===================================================================
--- sys/kern/vfs_extattr.c
+++ sys/kern/vfs_extattr.c
@@ -49,6 +49,14 @@
#include <security/audit/audit.h>
#include <security/mac/mac_framework.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vnode_pager.h>
+
/*
* Syscall to push extended attribute configuration information into the VFS.
* Accepts a path, which it converts to a mountpoint, as well as a command
@@ -147,45 +155,177 @@
return (error);
}
-/*-
- * Set a named extended attribute on a file or directory
- *
- * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
- * kernelspace string pointer "attrname", userspace buffer
- * pointer "data", buffer length "nbytes", thread "td".
- * Returns: 0 on success, an error number otherwise
- * Locks: none
- * References: vp must be a valid reference for the duration of the call
- */
+static bool
+do_vn_extattr_io_fault(struct vnode *vp, struct uio *uio, struct thread *td)
+{
+ struct mount *mp;
+ ssize_t size;
+ int res, error;
+
+ /*
+ * If we cannot get sysctl value, let assume that io_fault is enabled.
+ */
+ error = kernel_sysctlbyname(td, "debug.vn_io_fault_enable", &res, &size,
+ 0, 0, 0, 0);
+ if (error)
+ res = 1;
+
+ return (uio->uio_segflg == UIO_USERSPACE && vp->v_type == VREG &&
+ (mp = vp->v_mount) != NULL &&
+ (mp->mnt_kern_flag & MNTK_NO_IOPF) != 0 && res);
+}
+
+struct vn_extattr_io_fault_args {
+ struct ucred *cred;
+ struct vop_args_tag {
+ struct vnode *vp;
+ int attrnamespace;
+ char attrname[EXTATTR_MAXNAMELEN];
+ enum extattr_dio_t {
+ listextattr_t,
+ getextattr_t,
+ setextattr_t
+ } doio;
+ } vop_args;
+};
+
static int
-extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
- void *data, size_t nbytes, struct thread *td)
+vn_extattr_io_fault_touch(char *base, const struct uio *uio)
+{
+ int r;
+
+ r = fubyte(base);
+ if (r == -1 || (uio->uio_rw == UIO_READ && subyte(base, r) == -1))
+ return (EFAULT);
+ return (0);
+}
+
+static int
+vn_extattr_io_fault_prefault_user(const struct uio *uio)
+{
+ char *base;
+ const struct iovec *iov;
+ size_t len;
+ ssize_t resid;
+ int error, i;
+
+ KASSERT(uio->uio_segflg == UIO_USERSPACE,
+ ("vn_io_fault_prefault_user userspace"));
+
+ error = i = 0;
+ iov = uio->uio_iov;
+ resid = uio->uio_resid;
+ base = iov->iov_base;
+ len = iov->iov_len;
+ while (resid > 0) {
+ error = vn_extattr_io_fault_touch(base, uio);
+ if (error != 0)
+ break;
+ if (len < PAGE_SIZE) {
+ if (len != 0) {
+ error = vn_extattr_io_fault_touch(
+ base + len - 1, uio);
+ if (error != 0)
+ break;
+ resid -= len;
+ }
+ if (++i >= uio->uio_iovcnt)
+ break;
+ iov = uio->uio_iov + i;
+ base = iov->iov_base;
+ len = iov->iov_len;
+ } else {
+ len -= PAGE_SIZE;
+ base += PAGE_SIZE;
+ resid -= PAGE_SIZE;
+ }
+ }
+ return (error);
+}
+
+static int
+extattr_list_vp_helper(struct vnode *vp, int attrnamespace, struct uio *uio,
+ size_t *sizep, struct thread *td)
+{
+ ssize_t cnt;
+ int error;
+
+ if (uio != NULL)
+ cnt = uio->uio_resid;
+
+ vn_lock(vp, LK_SHARED | LK_RETRY);
+
+#ifdef MAC
+ error = mac_vnode_check_listextattr(td->td_ucred, vp, attrnamespace);
+ if (error)
+ goto done;
+#endif
+
+ error = VOP_LISTEXTATTR(vp, attrnamespace, uio, sizep,
+ td->td_ucred, td);
+
+ if (uio != NULL) {
+ cnt -= uio->uio_resid;
+ td->td_retval[0] = cnt;
+ } else
+ td->td_retval[0] = *sizep;
+#ifdef MAC
+done:
+#endif
+ VOP_UNLOCK(vp, 0);
+ return (error);
+}
+
+static int
+extattr_get_vp_helper(struct vnode *vp, int attrnamespace, const char *attrname,
+ struct uio *uio, size_t *sizep, struct thread *td)
+{
+ ssize_t cnt;
+ int error;
+
+ if (uio != NULL)
+ cnt = uio->uio_resid;
+
+ vn_lock(vp, LK_SHARED | LK_RETRY);
+
+#ifdef MAC
+ error = mac_vnode_check_getextattr(td->td_ucred, vp, attrnamespace,
+ attrname);
+ if (error)
+ goto done;
+#endif
+
+ error = VOP_GETEXTATTR(vp, attrnamespace, attrname, uio, sizep,
+ td->td_ucred, td);
+
+ if (uio != NULL) {
+ cnt -= uio->uio_resid;
+ td->td_retval[0] = cnt;
+ } else
+ td->td_retval[0] = *sizep;
+#ifdef MAC
+done:
+#endif
+ VOP_UNLOCK(vp, 0);
+ return (error);
+}
+
+
+static int
+extattr_set_vp_helper(struct vnode *vp, int attrnamespace, const char *attrname,
+ struct uio *uio, struct thread *td)
{
struct mount *mp;
- struct uio auio;
- struct iovec aiov;
ssize_t cnt;
int error;
- if (nbytes > IOSIZE_MAX)
- return (EINVAL);
+ cnt = uio->uio_resid;
error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
if (error)
return (error);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
- aiov.iov_base = data;
- aiov.iov_len = nbytes;
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- auio.uio_offset = 0;
- auio.uio_resid = nbytes;
- auio.uio_rw = UIO_WRITE;
- auio.uio_segflg = UIO_USERSPACE;
- auio.uio_td = td;
- cnt = nbytes;
-
#ifdef MAC
error = mac_vnode_check_setextattr(td->td_ucred, vp, attrnamespace,
attrname);
@@ -193,9 +333,9 @@
goto done;
#endif
- error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
+ error = VOP_SETEXTATTR(vp, attrnamespace, attrname, uio,
td->td_ucred, td);
- cnt -= auio.uio_resid;
+ cnt -= uio->uio_resid;
td->td_retval[0] = cnt;
#ifdef MAC
@@ -206,6 +346,217 @@
return (error);
}
+static int
+vn_extattr_io_fault_doio(struct vn_extattr_io_fault_args *args, struct uio *uio,
+ struct thread *td)
+{
+ size_t size;
+
+ KASSERT(uio != NULL, ("vn_extattr_io_fault_doio bad uio"));
+
+ switch (args->vop_args.doio) {
+ case listextattr_t:
+ size = uio->uio_resid;
+ return (extattr_list_vp_helper(args->vop_args.vp,
+ args->vop_args.attrnamespace, uio, &size, td));
+
+ case getextattr_t:
+ size = uio->uio_resid;
+ return (extattr_get_vp_helper(args->vop_args.vp,
+ args->vop_args.attrnamespace, args->vop_args.attrname, uio,
+ &size, td));
+
+ case setextattr_t:
+ return (extattr_set_vp_helper(args->vop_args.vp,
+ args->vop_args.attrnamespace, args->vop_args.attrname,
+ uio, td));
+
+ default:
+ panic("vn_extattr_io_fault_doio: unknown kind of io %d %d",
+ args->vop_args.doio, uio->uio_rw);
+ }
+
+ return (0);
+}
+
+static const int io_hold_cnt = 16;
+
+static int
+vn_extattr_io_fault1(struct vnode *vp, struct uio *uio, struct vn_extattr_io_fault_args *args,
+ struct thread *td)
+{
+ vm_page_t ma[io_hold_cnt + 2];
+ struct uio *uio_clone, short_uio;
+ struct iovec short_iovec[1];
+ vm_page_t *prev_td_ma;
+ vm_prot_t prot;
+ vm_offset_t addr, end;
+ size_t len, resid;
+ ssize_t adv, size;
+ int do_io_prefault, error, cnt, save, saveheld, prev_td_ma_cnt;
+
+ /*
+ * It is impossible to operate with extattrs with non-zero offset.
+ */
+ KASSERT(uio->uio_offset == 0,
+ ("vn_extattr_io_fault1 bad uio_offset"));
+
+ /*
+ * Disable prefaulting if we can not get the sysctl value.
+ */
+ error = kernel_sysctlbyname(td,
+ "debug.vn_io_fault_prefault", &do_io_prefault, &size, 0, 0, 0, 0);
+ if (error)
+ do_io_prefault = 0;
+
+ if (do_io_prefault) {
+ error = vn_extattr_io_fault_prefault_user(uio);
+ if (error != 0)
+ return (error); /* Or ignore ? */
+ }
+
+ prot = uio->uio_rw == UIO_READ ? VM_PROT_WRITE : VM_PROT_READ;
+
+ /*
+ * The UFS follows IO_UNIT directive and replays back both
+ * uio_offset and uio_resid if an error is encountered during the
+ * operation. But, since the iovec may be already advanced,
+ * uio is still in an inconsistent state.
+ *
+ * Cache a copy of the original uio, which is advanced to the redo
+ * point using UIO_NOCOPY below.
+ */
+ uio_clone = cloneuio(uio);
+ resid = uio->uio_resid;
+
+ short_uio.uio_segflg = UIO_USERSPACE;
+ short_uio.uio_rw = uio->uio_rw;
+ short_uio.uio_td = uio->uio_td;
+
+ save = vm_fault_disable_pagefaults();
+ error = vn_extattr_io_fault_doio(args, uio, td);
+ if (error != EFAULT)
+ goto out;
+
+ /* Count iofaults here. */
+ uio_clone->uio_segflg = UIO_NOCOPY;
+ uiomove(NULL, resid - uio->uio_resid, uio_clone);
+ uio_clone->uio_segflg = uio->uio_segflg;
+
+ saveheld = curthread_pflags_set(TDP_UIOHELD);
+ prev_td_ma = td->td_ma;
+ prev_td_ma_cnt = td->td_ma_cnt;
+
+ while (uio_clone->uio_resid != 0) {
+ len = uio_clone->uio_iov->iov_len;
+ if (len == 0) {
+ KASSERT(uio_clone->uio_iovcnt >= 1,
+ ("iovcnt underflow"));
+ uio_clone->uio_iov++;
+ uio_clone->uio_iovcnt--;
+ continue;
+ }
+ if (len > io_hold_cnt * PAGE_SIZE)
+ len = io_hold_cnt * PAGE_SIZE;
+ addr = (uintptr_t)uio_clone->uio_iov->iov_base;
+ end = round_page(addr + len);
+ if (end < addr) {
+ error = EFAULT;
+ break;
+ }
+ cnt = atop(end - trunc_page(addr));
+ /*
+ * A perfectly misaligned address and length could cause
+ * both the start and the end of the chunk to use partial
+ * page. +2 accounts for such a situation.
+ */
+ cnt = vm_fault_quick_hold_pages(&td->td_proc->p_vmspace->vm_map,
+ addr, len, prot, ma, io_hold_cnt + 2);
+ if (cnt == -1) {
+ error = EFAULT;
+ break;
+ }
+ short_uio.uio_iov = &short_iovec[0];
+ short_iovec[0].iov_base = (void *)addr;
+ short_uio.uio_iovcnt = 1;
+ short_uio.uio_resid = short_iovec[0].iov_len = len;
+ short_uio.uio_offset = uio_clone->uio_offset;
+ td->td_ma = ma;
+ td->td_ma_cnt = cnt;
+
+ error = vn_extattr_io_fault_doio(args, &short_uio, td);
+ vm_page_unhold_pages(ma, cnt);
+ adv = len - short_uio.uio_resid;
+
+ uio_clone->uio_iov->iov_base =
+ (char *)uio_clone->uio_iov->iov_base + adv;
+ uio_clone->uio_iov->iov_len -= adv;
+ uio_clone->uio_resid -= adv;
+ uio_clone->uio_offset += adv;
+
+ uio->uio_resid -= adv;
+ uio->uio_offset += adv;
+
+ if (error != 0 || adv == 0)
+ break;
+ }
+ td->td_ma = prev_td_ma;
+ td->td_ma_cnt = prev_td_ma_cnt;
+ curthread_pflags_restore(saveheld);
+out:
+ vm_fault_enable_pagefaults(save);
+ free(uio_clone, M_IOV);
+ return (error);
+}
+
+/*-
+ * Set a named extended attribute on a file or directory
+ *
+ * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
+ * kernelspace string pointer "attrname", userspace buffer
+ * pointer "data", buffer length "nbytes", thread "td".
+ * Returns: 0 on success, an error number otherwise
+ * Locks: none
+ * References: vp must be a valid reference for the duration of the call
+ */
+static int
+extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
+ void *data, size_t nbytes, struct thread *td)
+{
+ struct uio uio;
+ struct iovec aiov;
+ void *rl_cookie;
+ struct vn_extattr_io_fault_args args;
+ int error;
+
+ if (nbytes > IOSIZE_MAX)
+ return (EINVAL);
+
+ aiov.iov_base = data;
+ aiov.iov_len = nbytes;
+ uio.uio_iov = &aiov;
+ uio.uio_iovcnt = 1;
+ uio.uio_offset = 0;
+ uio.uio_resid = nbytes;
+ uio.uio_rw = UIO_WRITE;
+ uio.uio_segflg = UIO_USERSPACE;
+ uio.uio_td = td;
+
+ if (do_vn_extattr_io_fault(vp, &uio, td)) {
+ args.cred = td->td_ucred;
+ args.vop_args.vp = vp;
+ args.vop_args.attrnamespace = attrnamespace;
+ memcpy(args.vop_args.attrname, attrname, EXTATTR_MAXNAMELEN);
+ args.vop_args.doio = setextattr_t;
+ rl_cookie = vn_rangelock_wlock(vp, 0, uio.uio_resid);
+ error = vn_extattr_io_fault1(vp, &uio, &args, td);
+ vn_rangelock_unlock(vp, rl_cookie);
+ return (error);
+ }
+
+ return (extattr_set_vp_helper(vp, attrnamespace, attrname, &uio, td));
+}
+
int
sys_extattr_set_fd(td, uap)
struct thread *td;
@@ -327,15 +678,14 @@
{
struct uio auio, *auiop;
struct iovec aiov;
- ssize_t cnt;
size_t size, *sizep;
+ void *rl_cookie;
+ struct vn_extattr_io_fault_args args;
int error;
if (nbytes > IOSIZE_MAX)
return (EINVAL);
- vn_lock(vp, LK_SHARED | LK_RETRY);
-
/*
* Slightly unusual semantics: if the user provides a NULL data
* pointer, they don't want to receive the data, just the maximum
@@ -343,7 +693,6 @@
*/
auiop = NULL;
sizep = NULL;
- cnt = 0;
if (data != NULL) {
aiov.iov_base = data;
aiov.iov_len = nbytes;
@@ -355,30 +704,22 @@
auio.uio_segflg = UIO_USERSPACE;
auio.uio_td = td;
auiop = &auio;
- cnt = nbytes;
+ if (do_vn_extattr_io_fault(vp, auiop, td)) {
+ args.cred = td->td_ucred;
+ args.vop_args.vp = vp;
+ args.vop_args.attrnamespace = attrnamespace;
+ memcpy(args.vop_args.attrname, attrname, EXTATTR_MAXNAMELEN);
+ args.vop_args.doio = getextattr_t;
+ rl_cookie = vn_rangelock_rlock(vp, 0, auiop->uio_resid);
+ error = vn_extattr_io_fault1(vp, auiop, &args, td);
+ vn_rangelock_unlock(vp, rl_cookie);
+ return (error);
+ }
} else
sizep = &size;
-#ifdef MAC
- error = mac_vnode_check_getextattr(td->td_ucred, vp, attrnamespace,
- attrname);
- if (error)
- goto done;
-#endif
-
- error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
- td->td_ucred, td);
-
- if (auiop != NULL) {
- cnt -= auio.uio_resid;
- td->td_retval[0] = cnt;
- } else
- td->td_retval[0] = size;
-#ifdef MAC
-done:
-#endif
- VOP_UNLOCK(vp, 0);
- return (error);
+ return (extattr_get_vp_helper(vp, attrnamespace, attrname, auiop,
+ sizep, td));
}
int
@@ -636,17 +977,15 @@
struct uio auio, *auiop;
size_t size, *sizep;
struct iovec aiov;
- ssize_t cnt;
+ void *rl_cookie;
+ struct vn_extattr_io_fault_args args;
int error;
if (nbytes > IOSIZE_MAX)
return (EINVAL);
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
-
auiop = NULL;
sizep = NULL;
- cnt = 0;
if (data != NULL) {
aiov.iov_base = data;
aiov.iov_len = nbytes;
@@ -658,29 +997,20 @@
auio.uio_segflg = UIO_USERSPACE;
auio.uio_td = td;
auiop = &auio;
- cnt = nbytes;
+ if (do_vn_extattr_io_fault(vp, auiop, td)) {
+ args.cred = td->td_ucred;
+ args.vop_args.vp = vp;
+ args.vop_args.attrnamespace = attrnamespace;
+ args.vop_args.doio = listextattr_t;
+ rl_cookie = vn_rangelock_rlock(vp, 0, auiop->uio_resid);
+ error = vn_extattr_io_fault1(vp, auiop, &args, td);
+ vn_rangelock_unlock(vp, rl_cookie);
+ return (error);
+ }
} else
sizep = &size;
-#ifdef MAC
- error = mac_vnode_check_listextattr(td->td_ucred, vp, attrnamespace);
- if (error)
- goto done;
-#endif
-
- error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
- td->td_ucred, td);
-
- if (auiop != NULL) {
- cnt -= auio.uio_resid;
- td->td_retval[0] = cnt;
- } else
- td->td_retval[0] = size;
-#ifdef MAC
-done:
-#endif
- VOP_UNLOCK(vp, 0);
- return (error);
+ return (extattr_list_vp_helper(vp, attrnamespace, auiop, sizep, td));
}

File Metadata

Mime Type
text/plain
Expires
Mon, Feb 3, 10:57 PM (20 h, 51 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16443833
Default Alt Text
D13267.diff (14 KB)

Event Timeline