Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F109277152
D13267.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
14 KB
Referenced Files
None
Subscribers
None
D13267.diff
View Options
Index: sys/kern/vfs_extattr.c
===================================================================
--- sys/kern/vfs_extattr.c
+++ sys/kern/vfs_extattr.c
@@ -49,6 +49,14 @@
#include <security/audit/audit.h>
#include <security/mac/mac_framework.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vnode_pager.h>
+
/*
* Syscall to push extended attribute configuration information into the VFS.
* Accepts a path, which it converts to a mountpoint, as well as a command
@@ -147,45 +155,177 @@
return (error);
}
-/*-
- * Set a named extended attribute on a file or directory
- *
- * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
- * kernelspace string pointer "attrname", userspace buffer
- * pointer "data", buffer length "nbytes", thread "td".
- * Returns: 0 on success, an error number otherwise
- * Locks: none
- * References: vp must be a valid reference for the duration of the call
- */
+static bool
+do_vn_extattr_io_fault(struct vnode *vp, struct uio *uio, struct thread *td)
+{
+ struct mount *mp;
+ ssize_t size;
+ int res, error;
+
+ /*
+ * If we cannot get sysctl value, let assume that io_fault is enabled.
+ */
+ error = kernel_sysctlbyname(td, "debug.vn_io_fault_enable", &res, &size,
+ 0, 0, 0, 0);
+ if (error)
+ res = 1;
+
+ return (uio->uio_segflg == UIO_USERSPACE && vp->v_type == VREG &&
+ (mp = vp->v_mount) != NULL &&
+ (mp->mnt_kern_flag & MNTK_NO_IOPF) != 0 && res);
+}
+
+struct vn_extattr_io_fault_args {
+ struct ucred *cred;
+ struct vop_args_tag {
+ struct vnode *vp;
+ int attrnamespace;
+ char attrname[EXTATTR_MAXNAMELEN];
+ enum extattr_dio_t {
+ listextattr_t,
+ getextattr_t,
+ setextattr_t
+ } doio;
+ } vop_args;
+};
+
static int
-extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
- void *data, size_t nbytes, struct thread *td)
+vn_extattr_io_fault_touch(char *base, const struct uio *uio)
+{
+ int r;
+
+ r = fubyte(base);
+ if (r == -1 || (uio->uio_rw == UIO_READ && subyte(base, r) == -1))
+ return (EFAULT);
+ return (0);
+}
+
+static int
+vn_extattr_io_fault_prefault_user(const struct uio *uio)
+{
+ char *base;
+ const struct iovec *iov;
+ size_t len;
+ ssize_t resid;
+ int error, i;
+
+ KASSERT(uio->uio_segflg == UIO_USERSPACE,
+ ("vn_io_fault_prefault_user userspace"));
+
+ error = i = 0;
+ iov = uio->uio_iov;
+ resid = uio->uio_resid;
+ base = iov->iov_base;
+ len = iov->iov_len;
+ while (resid > 0) {
+ error = vn_extattr_io_fault_touch(base, uio);
+ if (error != 0)
+ break;
+ if (len < PAGE_SIZE) {
+ if (len != 0) {
+ error = vn_extattr_io_fault_touch(
+ base + len - 1, uio);
+ if (error != 0)
+ break;
+ resid -= len;
+ }
+ if (++i >= uio->uio_iovcnt)
+ break;
+ iov = uio->uio_iov + i;
+ base = iov->iov_base;
+ len = iov->iov_len;
+ } else {
+ len -= PAGE_SIZE;
+ base += PAGE_SIZE;
+ resid -= PAGE_SIZE;
+ }
+ }
+ return (error);
+}
+
+static int
+extattr_list_vp_helper(struct vnode *vp, int attrnamespace, struct uio *uio,
+ size_t *sizep, struct thread *td)
+{
+ ssize_t cnt;
+ int error;
+
+ if (uio != NULL)
+ cnt = uio->uio_resid;
+
+ vn_lock(vp, LK_SHARED | LK_RETRY);
+
+#ifdef MAC
+ error = mac_vnode_check_listextattr(td->td_ucred, vp, attrnamespace);
+ if (error)
+ goto done;
+#endif
+
+ error = VOP_LISTEXTATTR(vp, attrnamespace, uio, sizep,
+ td->td_ucred, td);
+
+ if (uio != NULL) {
+ cnt -= uio->uio_resid;
+ td->td_retval[0] = cnt;
+ } else
+ td->td_retval[0] = *sizep;
+#ifdef MAC
+done:
+#endif
+ VOP_UNLOCK(vp, 0);
+ return (error);
+}
+
+static int
+extattr_get_vp_helper(struct vnode *vp, int attrnamespace, const char *attrname,
+ struct uio *uio, size_t *sizep, struct thread *td)
+{
+ ssize_t cnt;
+ int error;
+
+ if (uio != NULL)
+ cnt = uio->uio_resid;
+
+ vn_lock(vp, LK_SHARED | LK_RETRY);
+
+#ifdef MAC
+ error = mac_vnode_check_getextattr(td->td_ucred, vp, attrnamespace,
+ attrname);
+ if (error)
+ goto done;
+#endif
+
+ error = VOP_GETEXTATTR(vp, attrnamespace, attrname, uio, sizep,
+ td->td_ucred, td);
+
+ if (uio != NULL) {
+ cnt -= uio->uio_resid;
+ td->td_retval[0] = cnt;
+ } else
+ td->td_retval[0] = *sizep;
+#ifdef MAC
+done:
+#endif
+ VOP_UNLOCK(vp, 0);
+ return (error);
+}
+
+
+static int
+extattr_set_vp_helper(struct vnode *vp, int attrnamespace, const char *attrname,
+ struct uio *uio, struct thread *td)
{
struct mount *mp;
- struct uio auio;
- struct iovec aiov;
ssize_t cnt;
int error;
- if (nbytes > IOSIZE_MAX)
- return (EINVAL);
+ cnt = uio->uio_resid;
error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
if (error)
return (error);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
- aiov.iov_base = data;
- aiov.iov_len = nbytes;
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- auio.uio_offset = 0;
- auio.uio_resid = nbytes;
- auio.uio_rw = UIO_WRITE;
- auio.uio_segflg = UIO_USERSPACE;
- auio.uio_td = td;
- cnt = nbytes;
-
#ifdef MAC
error = mac_vnode_check_setextattr(td->td_ucred, vp, attrnamespace,
attrname);
@@ -193,9 +333,9 @@
goto done;
#endif
- error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
+ error = VOP_SETEXTATTR(vp, attrnamespace, attrname, uio,
td->td_ucred, td);
- cnt -= auio.uio_resid;
+ cnt -= uio->uio_resid;
td->td_retval[0] = cnt;
#ifdef MAC
@@ -206,6 +346,217 @@
return (error);
}
+static int
+vn_extattr_io_fault_doio(struct vn_extattr_io_fault_args *args, struct uio *uio,
+ struct thread *td)
+{
+ size_t size;
+
+ KASSERT(uio != NULL, ("vn_extattr_io_fault_doio bad uio"));
+
+ switch (args->vop_args.doio) {
+ case listextattr_t:
+ size = uio->uio_resid;
+ return (extattr_list_vp_helper(args->vop_args.vp,
+ args->vop_args.attrnamespace, uio, &size, td));
+
+ case getextattr_t:
+ size = uio->uio_resid;
+ return (extattr_get_vp_helper(args->vop_args.vp,
+ args->vop_args.attrnamespace, args->vop_args.attrname, uio,
+ &size, td));
+
+ case setextattr_t:
+ return (extattr_set_vp_helper(args->vop_args.vp,
+ args->vop_args.attrnamespace, args->vop_args.attrname,
+ uio, td));
+
+ default:
+ panic("vn_extattr_io_fault_doio: unknown kind of io %d %d",
+ args->vop_args.doio, uio->uio_rw);
+ }
+
+ return (0);
+}
+
+static const int io_hold_cnt = 16;
+
+static int
+vn_extattr_io_fault1(struct vnode *vp, struct uio *uio, struct vn_extattr_io_fault_args *args,
+ struct thread *td)
+{
+ vm_page_t ma[io_hold_cnt + 2];
+ struct uio *uio_clone, short_uio;
+ struct iovec short_iovec[1];
+ vm_page_t *prev_td_ma;
+ vm_prot_t prot;
+ vm_offset_t addr, end;
+ size_t len, resid;
+ ssize_t adv, size;
+ int do_io_prefault, error, cnt, save, saveheld, prev_td_ma_cnt;
+
+ /*
+ * It is impossible to operate with extattrs with non-zero offset.
+ */
+ KASSERT(uio->uio_offset == 0,
+ ("vn_extattr_io_fault1 bad uio_offset"));
+
+ /*
+ * Disable prefaulting if we can not get the sysctl value.
+ */
+ error = kernel_sysctlbyname(td,
+ "debug.vn_io_fault_prefault", &do_io_prefault, &size, 0, 0, 0, 0);
+ if (error)
+ do_io_prefault = 0;
+
+ if (do_io_prefault) {
+ error = vn_extattr_io_fault_prefault_user(uio);
+ if (error != 0)
+ return (error); /* Or ignore ? */
+ }
+
+ prot = uio->uio_rw == UIO_READ ? VM_PROT_WRITE : VM_PROT_READ;
+
+ /*
+ * The UFS follows IO_UNIT directive and replays back both
+ * uio_offset and uio_resid if an error is encountered during the
+ * operation. But, since the iovec may be already advanced,
+ * uio is still in an inconsistent state.
+ *
+ * Cache a copy of the original uio, which is advanced to the redo
+ * point using UIO_NOCOPY below.
+ */
+ uio_clone = cloneuio(uio);
+ resid = uio->uio_resid;
+
+ short_uio.uio_segflg = UIO_USERSPACE;
+ short_uio.uio_rw = uio->uio_rw;
+ short_uio.uio_td = uio->uio_td;
+
+ save = vm_fault_disable_pagefaults();
+ error = vn_extattr_io_fault_doio(args, uio, td);
+ if (error != EFAULT)
+ goto out;
+
+ /* Count iofaults here. */
+ uio_clone->uio_segflg = UIO_NOCOPY;
+ uiomove(NULL, resid - uio->uio_resid, uio_clone);
+ uio_clone->uio_segflg = uio->uio_segflg;
+
+ saveheld = curthread_pflags_set(TDP_UIOHELD);
+ prev_td_ma = td->td_ma;
+ prev_td_ma_cnt = td->td_ma_cnt;
+
+ while (uio_clone->uio_resid != 0) {
+ len = uio_clone->uio_iov->iov_len;
+ if (len == 0) {
+ KASSERT(uio_clone->uio_iovcnt >= 1,
+ ("iovcnt underflow"));
+ uio_clone->uio_iov++;
+ uio_clone->uio_iovcnt--;
+ continue;
+ }
+ if (len > io_hold_cnt * PAGE_SIZE)
+ len = io_hold_cnt * PAGE_SIZE;
+ addr = (uintptr_t)uio_clone->uio_iov->iov_base;
+ end = round_page(addr + len);
+ if (end < addr) {
+ error = EFAULT;
+ break;
+ }
+ cnt = atop(end - trunc_page(addr));
+ /*
+ * A perfectly misaligned address and length could cause
+ * both the start and the end of the chunk to use partial
+ * page. +2 accounts for such a situation.
+ */
+ cnt = vm_fault_quick_hold_pages(&td->td_proc->p_vmspace->vm_map,
+ addr, len, prot, ma, io_hold_cnt + 2);
+ if (cnt == -1) {
+ error = EFAULT;
+ break;
+ }
+ short_uio.uio_iov = &short_iovec[0];
+ short_iovec[0].iov_base = (void *)addr;
+ short_uio.uio_iovcnt = 1;
+ short_uio.uio_resid = short_iovec[0].iov_len = len;
+ short_uio.uio_offset = uio_clone->uio_offset;
+ td->td_ma = ma;
+ td->td_ma_cnt = cnt;
+
+ error = vn_extattr_io_fault_doio(args, &short_uio, td);
+ vm_page_unhold_pages(ma, cnt);
+ adv = len - short_uio.uio_resid;
+
+ uio_clone->uio_iov->iov_base =
+ (char *)uio_clone->uio_iov->iov_base + adv;
+ uio_clone->uio_iov->iov_len -= adv;
+ uio_clone->uio_resid -= adv;
+ uio_clone->uio_offset += adv;
+
+ uio->uio_resid -= adv;
+ uio->uio_offset += adv;
+
+ if (error != 0 || adv == 0)
+ break;
+ }
+ td->td_ma = prev_td_ma;
+ td->td_ma_cnt = prev_td_ma_cnt;
+ curthread_pflags_restore(saveheld);
+out:
+ vm_fault_enable_pagefaults(save);
+ free(uio_clone, M_IOV);
+ return (error);
+}
+
+/*-
+ * Set a named extended attribute on a file or directory
+ *
+ * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
+ * kernelspace string pointer "attrname", userspace buffer
+ * pointer "data", buffer length "nbytes", thread "td".
+ * Returns: 0 on success, an error number otherwise
+ * Locks: none
+ * References: vp must be a valid reference for the duration of the call
+ */
+static int
+extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
+ void *data, size_t nbytes, struct thread *td)
+{
+ struct uio uio;
+ struct iovec aiov;
+ void *rl_cookie;
+ struct vn_extattr_io_fault_args args;
+ int error;
+
+ if (nbytes > IOSIZE_MAX)
+ return (EINVAL);
+
+ aiov.iov_base = data;
+ aiov.iov_len = nbytes;
+ uio.uio_iov = &aiov;
+ uio.uio_iovcnt = 1;
+ uio.uio_offset = 0;
+ uio.uio_resid = nbytes;
+ uio.uio_rw = UIO_WRITE;
+ uio.uio_segflg = UIO_USERSPACE;
+ uio.uio_td = td;
+
+ if (do_vn_extattr_io_fault(vp, &uio, td)) {
+ args.cred = td->td_ucred;
+ args.vop_args.vp = vp;
+ args.vop_args.attrnamespace = attrnamespace;
+ memcpy(args.vop_args.attrname, attrname, EXTATTR_MAXNAMELEN);
+ args.vop_args.doio = setextattr_t;
+ rl_cookie = vn_rangelock_wlock(vp, 0, uio.uio_resid);
+ error = vn_extattr_io_fault1(vp, &uio, &args, td);
+ vn_rangelock_unlock(vp, rl_cookie);
+ return (error);
+ }
+
+ return (extattr_set_vp_helper(vp, attrnamespace, attrname, &uio, td));
+}
+
int
sys_extattr_set_fd(td, uap)
struct thread *td;
@@ -327,15 +678,14 @@
{
struct uio auio, *auiop;
struct iovec aiov;
- ssize_t cnt;
size_t size, *sizep;
+ void *rl_cookie;
+ struct vn_extattr_io_fault_args args;
int error;
if (nbytes > IOSIZE_MAX)
return (EINVAL);
- vn_lock(vp, LK_SHARED | LK_RETRY);
-
/*
* Slightly unusual semantics: if the user provides a NULL data
* pointer, they don't want to receive the data, just the maximum
@@ -343,7 +693,6 @@
*/
auiop = NULL;
sizep = NULL;
- cnt = 0;
if (data != NULL) {
aiov.iov_base = data;
aiov.iov_len = nbytes;
@@ -355,30 +704,22 @@
auio.uio_segflg = UIO_USERSPACE;
auio.uio_td = td;
auiop = &auio;
- cnt = nbytes;
+ if (do_vn_extattr_io_fault(vp, auiop, td)) {
+ args.cred = td->td_ucred;
+ args.vop_args.vp = vp;
+ args.vop_args.attrnamespace = attrnamespace;
+ memcpy(args.vop_args.attrname, attrname, EXTATTR_MAXNAMELEN);
+ args.vop_args.doio = getextattr_t;
+ rl_cookie = vn_rangelock_rlock(vp, 0, auiop->uio_resid);
+ error = vn_extattr_io_fault1(vp, auiop, &args, td);
+ vn_rangelock_unlock(vp, rl_cookie);
+ return (error);
+ }
} else
sizep = &size;
-#ifdef MAC
- error = mac_vnode_check_getextattr(td->td_ucred, vp, attrnamespace,
- attrname);
- if (error)
- goto done;
-#endif
-
- error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
- td->td_ucred, td);
-
- if (auiop != NULL) {
- cnt -= auio.uio_resid;
- td->td_retval[0] = cnt;
- } else
- td->td_retval[0] = size;
-#ifdef MAC
-done:
-#endif
- VOP_UNLOCK(vp, 0);
- return (error);
+ return (extattr_get_vp_helper(vp, attrnamespace, attrname, auiop,
+ sizep, td));
}
int
@@ -636,17 +977,15 @@
struct uio auio, *auiop;
size_t size, *sizep;
struct iovec aiov;
- ssize_t cnt;
+ void *rl_cookie;
+ struct vn_extattr_io_fault_args args;
int error;
if (nbytes > IOSIZE_MAX)
return (EINVAL);
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
-
auiop = NULL;
sizep = NULL;
- cnt = 0;
if (data != NULL) {
aiov.iov_base = data;
aiov.iov_len = nbytes;
@@ -658,29 +997,20 @@
auio.uio_segflg = UIO_USERSPACE;
auio.uio_td = td;
auiop = &auio;
- cnt = nbytes;
+ if (do_vn_extattr_io_fault(vp, auiop, td)) {
+ args.cred = td->td_ucred;
+ args.vop_args.vp = vp;
+ args.vop_args.attrnamespace = attrnamespace;
+ args.vop_args.doio = listextattr_t;
+ rl_cookie = vn_rangelock_rlock(vp, 0, auiop->uio_resid);
+ error = vn_extattr_io_fault1(vp, auiop, &args, td);
+ vn_rangelock_unlock(vp, rl_cookie);
+ return (error);
+ }
} else
sizep = &size;
-#ifdef MAC
- error = mac_vnode_check_listextattr(td->td_ucred, vp, attrnamespace);
- if (error)
- goto done;
-#endif
-
- error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
- td->td_ucred, td);
-
- if (auiop != NULL) {
- cnt -= auio.uio_resid;
- td->td_retval[0] = cnt;
- } else
- td->td_retval[0] = size;
-#ifdef MAC
-done:
-#endif
- VOP_UNLOCK(vp, 0);
- return (error);
+ return (extattr_list_vp_helper(vp, attrnamespace, auiop, sizep, td));
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Feb 3, 10:57 PM (20 h, 51 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16443833
Default Alt Text
D13267.diff (14 KB)
Attached To
Mode
D13267: Export extattr_*vp() functions to allow to use it in the linuxulator in future
Attached
Detach File
Event Timeline
Log In to Comment