Page MenuHomeFreeBSD

D26668.diff
No OneTemporary

D26668.diff

diff --git a/sys/bsm/audit_kevents.h b/sys/bsm/audit_kevents.h
--- a/sys/bsm/audit_kevents.h
+++ b/sys/bsm/audit_kevents.h
@@ -659,6 +659,7 @@
#define AUE_SHMRENAME 43263 /* FreeBSD-specific. */
#define AUE_REALPATHAT 43264 /* FreeBSD-specific. */
#define AUE_CLOSERANGE 43265 /* FreeBSD-specific. */
+#define AUE_SPECIALFD 43266 /* FreeBSD-specific. */
/*
* Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the
diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/syscalls.master
--- a/sys/compat/freebsd32/syscalls.master
+++ b/sys/compat/freebsd32/syscalls.master
@@ -1168,5 +1168,7 @@
; 576 is initialised by the krpc code, if present.
576 AUE_NULL NOSTD|NOPROTO { int rpctls_syscall(int op, \
const char *path); }
+577 AUE_SPECIALFD NOPROTO { int __specialfd(int type, const void *req, \
+ size_t len); }
; vim: syntax=off
diff --git a/sys/compat/linux/linux_event.c b/sys/compat/linux/linux_event.c
--- a/sys/compat/linux/linux_event.c
+++ b/sys/compat/linux/linux_event.c
@@ -51,9 +51,11 @@
#include <sys/poll.h>
#include <sys/proc.h>
#include <sys/selinfo.h>
+#include <sys/specialfd.h>
#include <sys/sx.h>
#include <sys/syscallsubr.h>
#include <sys/timespec.h>
+#include <sys/eventfd.h>
#ifdef COMPAT_LINUX32
#include <machine/../linux32/linux.h>
@@ -124,53 +126,11 @@
int error;
};
-/* eventfd */
-typedef uint64_t eventfd_t;
-
-static fo_rdwr_t eventfd_read;
-static fo_rdwr_t eventfd_write;
-static fo_ioctl_t eventfd_ioctl;
-static fo_poll_t eventfd_poll;
-static fo_kqfilter_t eventfd_kqfilter;
-static fo_stat_t eventfd_stat;
-static fo_close_t eventfd_close;
-static fo_fill_kinfo_t eventfd_fill_kinfo;
-
-static struct fileops eventfdops = {
- .fo_read = eventfd_read,
- .fo_write = eventfd_write,
- .fo_truncate = invfo_truncate,
- .fo_ioctl = eventfd_ioctl,
- .fo_poll = eventfd_poll,
- .fo_kqfilter = eventfd_kqfilter,
- .fo_stat = eventfd_stat,
- .fo_close = eventfd_close,
- .fo_chmod = invfo_chmod,
- .fo_chown = invfo_chown,
- .fo_sendfile = invfo_sendfile,
- .fo_fill_kinfo = eventfd_fill_kinfo,
- .fo_flags = DFLAG_PASSABLE
-};
-
-static void filt_eventfddetach(struct knote *kn);
-static int filt_eventfdread(struct knote *kn, long hint);
-static int filt_eventfdwrite(struct knote *kn, long hint);
-
-static struct filterops eventfd_rfiltops = {
- .f_isfd = 1,
- .f_detach = filt_eventfddetach,
- .f_event = filt_eventfdread
-};
-static struct filterops eventfd_wfiltops = {
- .f_isfd = 1,
- .f_detach = filt_eventfddetach,
- .f_event = filt_eventfdwrite
-};
-
/* timerfd */
typedef uint64_t timerfd_t;
static fo_rdwr_t timerfd_read;
+static fo_ioctl_t timerfd_ioctl;
static fo_poll_t timerfd_poll;
static fo_kqfilter_t timerfd_kqfilter;
static fo_stat_t timerfd_stat;
@@ -181,7 +141,7 @@
.fo_read = timerfd_read,
.fo_write = invfo_rdwr,
.fo_truncate = invfo_truncate,
- .fo_ioctl = eventfd_ioctl,
+ .fo_ioctl = timerfd_ioctl,
.fo_poll = timerfd_poll,
.fo_kqfilter = timerfd_kqfilter,
.fo_stat = timerfd_stat,
@@ -202,13 +162,6 @@
.f_event = filt_timerfdread
};
-struct eventfd {
- eventfd_t efd_count;
- uint32_t efd_flags;
- struct selinfo efd_sel;
- struct mtx efd_lock;
-};
-
struct timerfd {
clockid_t tfd_clockid;
struct itimerspec tfd_time;
@@ -219,7 +172,6 @@
struct mtx tfd_lock;
};
-static int eventfd_create(struct thread *td, uint32_t initval, int flags);
static void linux_timerfd_expire(void *);
static void linux_timerfd_curval(struct timerfd *, struct itimerspec *);
@@ -691,294 +643,39 @@
return (error1 == 0 ? 0 : error2);
}
-static int
-eventfd_create(struct thread *td, uint32_t initval, int flags)
-{
- struct filedesc *fdp;
- struct eventfd *efd;
- struct file *fp;
- int fflags, fd, error;
-
- fflags = 0;
- if ((flags & LINUX_O_CLOEXEC) != 0)
- fflags |= O_CLOEXEC;
-
- fdp = td->td_proc->p_fd;
- error = falloc(td, &fp, &fd, fflags);
- if (error != 0)
- return (error);
-
- efd = malloc(sizeof(*efd), M_EPOLL, M_WAITOK | M_ZERO);
- efd->efd_flags = flags;
- efd->efd_count = initval;
- mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF);
-
- knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock);
-
- fflags = FREAD | FWRITE;
- if ((flags & LINUX_O_NONBLOCK) != 0)
- fflags |= FNONBLOCK;
-
- finit(fp, fflags, DTYPE_LINUXEFD, efd, &eventfdops);
- fdrop(fp, td);
-
- td->td_retval[0] = fd;
- return (error);
-}
-
#ifdef LINUX_LEGACY_SYSCALLS
int
linux_eventfd(struct thread *td, struct linux_eventfd_args *args)
{
+ struct specialfd_eventfd ae;
- return (eventfd_create(td, args->initval, 0));
+ bzero(&ae, sizeof(ae));
+ ae.initval = args->initval;
+ return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae));
}
#endif
int
linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args)
{
+ struct specialfd_eventfd ae;
+ int flags;
- if ((args->flags & ~(LINUX_O_CLOEXEC|LINUX_O_NONBLOCK|LINUX_EFD_SEMAPHORE)) != 0)
- return (EINVAL);
-
- return (eventfd_create(td, args->initval, args->flags));
-}
-
-static int
-eventfd_close(struct file *fp, struct thread *td)
-{
- struct eventfd *efd;
-
- efd = fp->f_data;
- if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
- return (EINVAL);
-
- seldrain(&efd->efd_sel);
- knlist_destroy(&efd->efd_sel.si_note);
-
- fp->f_ops = &badfileops;
- mtx_destroy(&efd->efd_lock);
- free(efd, M_EPOLL);
-
- return (0);
-}
-
-static int
-eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
- int flags, struct thread *td)
-{
- struct eventfd *efd;
- eventfd_t count;
- int error;
-
- efd = fp->f_data;
- if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
- return (EINVAL);
-
- if (uio->uio_resid < sizeof(eventfd_t))
- return (EINVAL);
-
- error = 0;
- mtx_lock(&efd->efd_lock);
-retry:
- if (efd->efd_count == 0) {
- if ((fp->f_flag & FNONBLOCK) != 0) {
- mtx_unlock(&efd->efd_lock);
- return (EAGAIN);
- }
- error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, "lefdrd", 0);
- if (error == 0)
- goto retry;
- }
- if (error == 0) {
- if ((efd->efd_flags & LINUX_EFD_SEMAPHORE) != 0) {
- count = 1;
- --efd->efd_count;
- } else {
- count = efd->efd_count;
- efd->efd_count = 0;
- }
- KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
- selwakeup(&efd->efd_sel);
- wakeup(&efd->efd_count);
- mtx_unlock(&efd->efd_lock);
- error = uiomove(&count, sizeof(eventfd_t), uio);
- } else
- mtx_unlock(&efd->efd_lock);
-
- return (error);
-}
-
-static int
-eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
- int flags, struct thread *td)
-{
- struct eventfd *efd;
- eventfd_t count;
- int error;
-
- efd = fp->f_data;
- if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
- return (EINVAL);
-
- if (uio->uio_resid < sizeof(eventfd_t))
- return (EINVAL);
-
- error = uiomove(&count, sizeof(eventfd_t), uio);
- if (error != 0)
- return (error);
- if (count == UINT64_MAX)
- return (EINVAL);
-
- mtx_lock(&efd->efd_lock);
-retry:
- if (UINT64_MAX - efd->efd_count <= count) {
- if ((fp->f_flag & FNONBLOCK) != 0) {
- mtx_unlock(&efd->efd_lock);
- /* Do not not return the number of bytes written */
- uio->uio_resid += sizeof(eventfd_t);
- return (EAGAIN);
- }
- error = mtx_sleep(&efd->efd_count, &efd->efd_lock,
- PCATCH, "lefdwr", 0);
- if (error == 0)
- goto retry;
- }
- if (error == 0) {
- efd->efd_count += count;
- KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
- selwakeup(&efd->efd_sel);
- wakeup(&efd->efd_count);
- }
- mtx_unlock(&efd->efd_lock);
-
- return (error);
-}
-
-static int
-eventfd_poll(struct file *fp, int events, struct ucred *active_cred,
- struct thread *td)
-{
- struct eventfd *efd;
- int revents = 0;
-
- efd = fp->f_data;
- if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
- return (POLLERR);
-
- mtx_lock(&efd->efd_lock);
- if ((events & (POLLIN|POLLRDNORM)) && efd->efd_count > 0)
- revents |= events & (POLLIN|POLLRDNORM);
- if ((events & (POLLOUT|POLLWRNORM)) && UINT64_MAX - 1 > efd->efd_count)
- revents |= events & (POLLOUT|POLLWRNORM);
- if (revents == 0)
- selrecord(td, &efd->efd_sel);
- mtx_unlock(&efd->efd_lock);
-
- return (revents);
-}
-
-static int
-eventfd_kqfilter(struct file *fp, struct knote *kn)
-{
- struct eventfd *efd;
-
- efd = fp->f_data;
- if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
- return (EINVAL);
-
- mtx_lock(&efd->efd_lock);
- switch (kn->kn_filter) {
- case EVFILT_READ:
- kn->kn_fop = &eventfd_rfiltops;
- break;
- case EVFILT_WRITE:
- kn->kn_fop = &eventfd_wfiltops;
- break;
- default:
- mtx_unlock(&efd->efd_lock);
- return (EINVAL);
- }
-
- kn->kn_hook = efd;
- knlist_add(&efd->efd_sel.si_note, kn, 1);
- mtx_unlock(&efd->efd_lock);
-
- return (0);
-}
-
-static void
-filt_eventfddetach(struct knote *kn)
-{
- struct eventfd *efd = kn->kn_hook;
-
- mtx_lock(&efd->efd_lock);
- knlist_remove(&efd->efd_sel.si_note, kn, 1);
- mtx_unlock(&efd->efd_lock);
-}
-
-static int
-filt_eventfdread(struct knote *kn, long hint)
-{
- struct eventfd *efd = kn->kn_hook;
- int ret;
-
- mtx_assert(&efd->efd_lock, MA_OWNED);
- ret = (efd->efd_count > 0);
-
- return (ret);
-}
-
-static int
-filt_eventfdwrite(struct knote *kn, long hint)
-{
- struct eventfd *efd = kn->kn_hook;
- int ret;
-
- mtx_assert(&efd->efd_lock, MA_OWNED);
- ret = (UINT64_MAX - 1 > efd->efd_count);
-
- return (ret);
-}
-
-static int
-eventfd_ioctl(struct file *fp, u_long cmd, void *data,
- struct ucred *active_cred, struct thread *td)
-{
-
- if (fp->f_data == NULL || (fp->f_type != DTYPE_LINUXEFD &&
- fp->f_type != DTYPE_LINUXTFD))
+ if ((args->flags & ~(LINUX_O_CLOEXEC | LINUX_O_NONBLOCK |
+ LINUX_EFD_SEMAPHORE)) != 0)
return (EINVAL);
-
- switch (cmd)
- {
- case FIONBIO:
- if ((*(int *)data))
- atomic_set_int(&fp->f_flag, FNONBLOCK);
- else
- atomic_clear_int(&fp->f_flag, FNONBLOCK);
- case FIOASYNC:
- return (0);
- default:
- return (ENXIO);
- }
-}
-
-static int
-eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
- struct thread *td)
-{
-
- return (ENXIO);
-}
-
-static int
-eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
-{
-
- kif->kf_type = KF_TYPE_UNKNOWN;
- return (0);
+ flags = 0;
+ if ((args->flags & LINUX_O_CLOEXEC) != 0)
+ flags |= EFD_CLOEXEC;
+ if ((args->flags & LINUX_O_NONBLOCK) != 0)
+ flags |= EFD_NONBLOCK;
+ if ((args->flags & LINUX_EFD_SEMAPHORE) != 0)
+ flags |= EFD_SEMAPHORE;
+
+ bzero(&ae, sizeof(ae));
+ ae.flags = flags;
+ ae.initval = args->initval;
+ return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae));
}
int
@@ -1154,6 +851,23 @@
return (tfd->tfd_count > 0);
}
+static int
+timerfd_ioctl(struct file *fp, u_long cmd, void *data,
+ struct ucred *active_cred, struct thread *td)
+{
+
+ if (fp->f_data == NULL || fp->f_type != DTYPE_LINUXTFD)
+ return (EINVAL);
+
+ switch (cmd) {
+ case FIONBIO:
+ case FIOASYNC:
+ return (0);
+ }
+
+ return (ENOTTY);
+}
+
static int
timerfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
struct thread *td)
diff --git a/sys/conf/files b/sys/conf/files
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3926,6 +3926,7 @@
kern/subr_vmem.c standard
kern/subr_witness.c optional witness
kern/sys_capability.c standard
+kern/sys_eventfd.c standard
kern/sys_generic.c standard
kern/sys_getrandom.c standard
kern/sys_pipe.c standard
diff --git a/sys/kern/capabilities.conf b/sys/kern/capabilities.conf
--- a/sys/kern/capabilities.conf
+++ b/sys/kern/capabilities.conf
@@ -55,6 +55,11 @@
__mac_set_fd
__mac_set_proc
+##
+## Allow creating special file descriptors like eventfd(2).
+##
+__specialfd
+
##
## Allow sysctl(2) as we scope internal to the call; this is a global
## namespace, but there are several critical sysctls required for almost
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -4609,8 +4609,8 @@
return ("dev");
case DTYPE_PROCDESC:
return ("proc");
- case DTYPE_LINUXEFD:
- return ("levent");
+ case DTYPE_EVENTFD:
+ return ("eventfd");
case DTYPE_LINUXTFD:
return ("ltimer");
default:
diff --git a/sys/kern/sys_eventfd.c b/sys/kern/sys_eventfd.c
new file mode 100644
--- /dev/null
+++ b/sys/kern/sys_eventfd.c
@@ -0,0 +1,349 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2007 Roman Divacky
+ * Copyright (c) 2014 Dmitry Chagin
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/types.h>
+#include <sys/user.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/filio.h>
+#include <sys/stat.h>
+#include <sys/errno.h>
+#include <sys/event.h>
+#include <sys/poll.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/selinfo.h>
+#include <sys/eventfd.h>
+
+#include <security/audit/audit.h>
+
+_Static_assert(EFD_CLOEXEC == O_CLOEXEC, "Mismatched EFD_CLOEXEC");
+_Static_assert(EFD_NONBLOCK == O_NONBLOCK, "Mismatched EFD_NONBLOCK");
+
+MALLOC_DEFINE(M_EVENTFD, "eventfd", "eventfd structures");
+
+static fo_rdwr_t eventfd_read;
+static fo_rdwr_t eventfd_write;
+static fo_ioctl_t eventfd_ioctl;
+static fo_poll_t eventfd_poll;
+static fo_kqfilter_t eventfd_kqfilter;
+static fo_stat_t eventfd_stat;
+static fo_close_t eventfd_close;
+static fo_fill_kinfo_t eventfd_fill_kinfo;
+
+static struct fileops eventfdops = {
+ .fo_read = eventfd_read,
+ .fo_write = eventfd_write,
+ .fo_truncate = invfo_truncate,
+ .fo_ioctl = eventfd_ioctl,
+ .fo_poll = eventfd_poll,
+ .fo_kqfilter = eventfd_kqfilter,
+ .fo_stat = eventfd_stat,
+ .fo_close = eventfd_close,
+ .fo_chmod = invfo_chmod,
+ .fo_chown = invfo_chown,
+ .fo_sendfile = invfo_sendfile,
+ .fo_fill_kinfo = eventfd_fill_kinfo,
+ .fo_flags = DFLAG_PASSABLE
+};
+
+static void filt_eventfddetach(struct knote *kn);
+static int filt_eventfdread(struct knote *kn, long hint);
+static int filt_eventfdwrite(struct knote *kn, long hint);
+
+static struct filterops eventfd_rfiltops = {
+ .f_isfd = 1,
+ .f_detach = filt_eventfddetach,
+ .f_event = filt_eventfdread
+};
+
+static struct filterops eventfd_wfiltops = {
+ .f_isfd = 1,
+ .f_detach = filt_eventfddetach,
+ .f_event = filt_eventfdwrite
+};
+
+struct eventfd {
+ eventfd_t efd_count;
+ uint32_t efd_flags;
+ struct selinfo efd_sel;
+ struct mtx efd_lock;
+};
+
+int
+eventfd_create_file(struct thread *td, struct file *fp, uint32_t initval,
+ int flags)
+{
+ struct eventfd *efd;
+ int fflags;
+
+ AUDIT_ARG_FFLAGS(flags);
+ AUDIT_ARG_VALUE(initval);
+
+ efd = malloc(sizeof(*efd), M_EVENTFD, M_WAITOK | M_ZERO);
+ efd->efd_flags = flags;
+ efd->efd_count = initval;
+ mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF);
+ knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock);
+
+ fflags = FREAD | FWRITE;
+ if ((flags & EFD_NONBLOCK) != 0)
+ fflags |= FNONBLOCK;
+ finit(fp, fflags, DTYPE_EVENTFD, efd, &eventfdops);
+
+ return (0);
+}
+
+static int
+eventfd_close(struct file *fp, struct thread *td)
+{
+ struct eventfd *efd;
+
+ efd = fp->f_data;
+ seldrain(&efd->efd_sel);
+ knlist_destroy(&efd->efd_sel.si_note);
+ mtx_destroy(&efd->efd_lock);
+ free(efd, M_EVENTFD);
+ return (0);
+}
+
+static int
+eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
+ int flags, struct thread *td)
+{
+ struct eventfd *efd;
+ eventfd_t count;
+ int error;
+
+ if (uio->uio_resid < sizeof(eventfd_t))
+ return (EINVAL);
+
+ error = 0;
+ efd = fp->f_data;
+ mtx_lock(&efd->efd_lock);
+ while (error == 0 && efd->efd_count == 0) {
+ if ((fp->f_flag & FNONBLOCK) != 0) {
+ mtx_unlock(&efd->efd_lock);
+ return (EAGAIN);
+ }
+ error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH,
+ "efdrd", 0);
+ }
+ if (error == 0) {
+ MPASS(efd->efd_count > 0);
+ if ((efd->efd_flags & EFD_SEMAPHORE) != 0) {
+ count = 1;
+ --efd->efd_count;
+ } else {
+ count = efd->efd_count;
+ efd->efd_count = 0;
+ }
+ KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
+ selwakeup(&efd->efd_sel);
+ wakeup(&efd->efd_count);
+ mtx_unlock(&efd->efd_lock);
+ error = uiomove(&count, sizeof(eventfd_t), uio);
+ } else
+ mtx_unlock(&efd->efd_lock);
+
+ return (error);
+}
+
+static int
+eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
+ int flags, struct thread *td)
+{
+ struct eventfd *efd;
+ eventfd_t count;
+ int error;
+
+ if (uio->uio_resid < sizeof(eventfd_t))
+ return (EINVAL);
+
+ error = uiomove(&count, sizeof(eventfd_t), uio);
+ if (error != 0)
+ return (error);
+ if (count == UINT64_MAX)
+ return (EINVAL);
+
+ efd = fp->f_data;
+ mtx_lock(&efd->efd_lock);
+retry:
+ if (UINT64_MAX - efd->efd_count <= count) {
+ if ((fp->f_flag & FNONBLOCK) != 0) {
+ mtx_unlock(&efd->efd_lock);
+ /* Do not not return the number of bytes written */
+ uio->uio_resid += sizeof(eventfd_t);
+ return (EAGAIN);
+ }
+ error = mtx_sleep(&efd->efd_count, &efd->efd_lock,
+ PCATCH, "efdwr", 0);
+ if (error == 0)
+ goto retry;
+ }
+ if (error == 0) {
+ MPASS(UINT64_MAX - efd->efd_count > count);
+ efd->efd_count += count;
+ KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
+ selwakeup(&efd->efd_sel);
+ wakeup(&efd->efd_count);
+ }
+ mtx_unlock(&efd->efd_lock);
+
+ return (error);
+}
+
+static int
+eventfd_poll(struct file *fp, int events, struct ucred *active_cred,
+ struct thread *td)
+{
+ struct eventfd *efd;
+ int revents;
+
+ efd = fp->f_data;
+ revents = 0;
+ mtx_lock(&efd->efd_lock);
+ if ((events & (POLLIN | POLLRDNORM)) != 0 && efd->efd_count > 0)
+ revents |= events & (POLLIN | POLLRDNORM);
+ if ((events & (POLLOUT | POLLWRNORM)) != 0 && UINT64_MAX - 1 >
+ efd->efd_count)
+ revents |= events & (POLLOUT | POLLWRNORM);
+ if (revents == 0)
+ selrecord(td, &efd->efd_sel);
+ mtx_unlock(&efd->efd_lock);
+
+ return (revents);
+}
+
+static int
+eventfd_kqfilter(struct file *fp, struct knote *kn)
+{
+ struct eventfd *efd = fp->f_data;
+
+ mtx_lock(&efd->efd_lock);
+ switch (kn->kn_filter) {
+ case EVFILT_READ:
+ kn->kn_fop = &eventfd_rfiltops;
+ break;
+ case EVFILT_WRITE:
+ kn->kn_fop = &eventfd_wfiltops;
+ break;
+ default:
+ mtx_unlock(&efd->efd_lock);
+ return (EINVAL);
+ }
+
+ kn->kn_hook = efd;
+ knlist_add(&efd->efd_sel.si_note, kn, 1);
+ mtx_unlock(&efd->efd_lock);
+
+ return (0);
+}
+
+static void
+filt_eventfddetach(struct knote *kn)
+{
+ struct eventfd *efd = kn->kn_hook;
+
+ mtx_lock(&efd->efd_lock);
+ knlist_remove(&efd->efd_sel.si_note, kn, 1);
+ mtx_unlock(&efd->efd_lock);
+}
+
+static int
+filt_eventfdread(struct knote *kn, long hint)
+{
+ struct eventfd *efd = kn->kn_hook;
+ int ret;
+
+ mtx_assert(&efd->efd_lock, MA_OWNED);
+ kn->kn_data = (int64_t)efd->efd_count;
+ ret = efd->efd_count > 0;
+
+ return (ret);
+}
+
+static int
+filt_eventfdwrite(struct knote *kn, long hint)
+{
+ struct eventfd *efd = kn->kn_hook;
+ int ret;
+
+ mtx_assert(&efd->efd_lock, MA_OWNED);
+ kn->kn_data = (int64_t)(UINT64_MAX - 1 - efd->efd_count);
+ ret = UINT64_MAX - 1 > efd->efd_count;
+
+ return (ret);
+}
+
+static int
+eventfd_ioctl(struct file *fp, u_long cmd, void *data,
+ struct ucred *active_cred, struct thread *td)
+{
+ switch (cmd) {
+ case FIONBIO:
+ case FIOASYNC:
+ return (0);
+ }
+
+ return (ENOTTY);
+}
+
+static int
+eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
+ struct thread *td)
+{
+ bzero((void *)st, sizeof *st);
+ st->st_mode = S_IFIFO;
+ return (0);
+}
+
+static int
+eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
+{
+ struct eventfd *efd = fp->f_data;
+
+ kif->kf_type = KF_TYPE_EVENTFD;
+ mtx_lock(&efd->efd_lock);
+ kif->kf_un.kf_eventfd.kf_eventfd_value = efd->efd_count;
+ kif->kf_un.kf_eventfd.kf_eventfd_flags = efd->efd_flags;
+ mtx_unlock(&efd->efd_lock);
+ return (0);
+}
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -55,6 +55,7 @@
#include <sys/signalvar.h>
#include <sys/socketvar.h>
#include <sys/uio.h>
+#include <sys/eventfd.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/limits.h>
@@ -63,6 +64,7 @@
#include <sys/resourcevar.h>
#include <sys/selinfo.h>
#include <sys/sleepqueue.h>
+#include <sys/specialfd.h>
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
#include <sys/sysent.h>
@@ -859,6 +861,67 @@
return (error);
}
+int
+kern_specialfd(struct thread *td, int type, void *arg)
+{
+ struct file *fp;
+ struct specialfd_eventfd *ae;
+ int error, fd, fflags;
+
+ fflags = 0;
+ error = falloc_noinstall(td, &fp);
+ if (error != 0)
+ return (error);
+
+ switch (type) {
+ case SPECIALFD_EVENTFD:
+ ae = arg;
+ if ((ae->flags & EFD_CLOEXEC) != 0)
+ fflags |= O_CLOEXEC;
+ error = eventfd_create_file(td, fp, ae->initval, ae->flags);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ if (error == 0)
+ error = finstall(td, fp, &fd, fflags, NULL);
+ fdrop(fp, td);
+ if (error == 0)
+ td->td_retval[0] = fd;
+ return (error);
+}
+
+int
+sys___specialfd(struct thread *td, struct __specialfd_args *args)
+{
+ struct specialfd_eventfd ae;
+ int error;
+
+ switch (args->type) {
+ case SPECIALFD_EVENTFD:
+ if (args->len != sizeof(struct specialfd_eventfd)) {
+ error = EINVAL;
+ break;
+ }
+ error = copyin(args->req, &ae, sizeof(ae));
+ if (error != 0)
+ break;
+ if ((ae.flags & ~(EFD_CLOEXEC | EFD_NONBLOCK |
+ EFD_SEMAPHORE)) != 0) {
+ error = EINVAL;
+ break;
+ }
+ error = kern_specialfd(td, args->type, &ae);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ return (error);
+}
+
int
poll_no_poll(int events)
{
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -3241,6 +3241,13 @@
_In_z_ const char *path
);
}
+577 AUE_SPECIALFD STD {
+ int __specialfd(
+ int type,
+ _In_reads_bytes_(len) const void *req,
+ size_t len
+ );
+ }
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master
diff --git a/sys/sys/eventfd.h b/sys/sys/eventfd.h
new file mode 100644
--- /dev/null
+++ b/sys/sys/eventfd.h
@@ -0,0 +1,54 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Greg V
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _SYS_EVENTFD_H_
+#define _SYS_EVENTFD_H_
+
+#include <sys/types.h>
+
+typedef uint64_t eventfd_t;
+
+#define EFD_SEMAPHORE 0x00000001
+#define EFD_NONBLOCK 0x00000004
+#define EFD_CLOEXEC 0x00100000
+
+#ifdef _KERNEL
+
+int eventfd_create_file(struct thread *td, struct file *fp, uint32_t initval,
+ int flags);
+
+#else
+
+__BEGIN_DECLS
+int eventfd(unsigned int initval, int flags);
+int eventfd_read(int fd, eventfd_t *value);
+int eventfd_write(int fd, eventfd_t value);
+__END_DECLS
+
+#endif /* !_KERNEL */
+
+#endif /* !_SYS_EVENTFD_H_ */
diff --git a/sys/sys/file.h b/sys/sys/file.h
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -69,7 +69,7 @@
#define DTYPE_PTS 10 /* pseudo teletype master device */
#define DTYPE_DEV 11 /* Device specific fd type */
#define DTYPE_PROCDESC 12 /* process descriptor */
-#define DTYPE_LINUXEFD 13 /* emulation eventfd type */
+#define DTYPE_EVENTFD 13 /* eventfd */
#define DTYPE_LINUXTFD 14 /* emulation timerfd type */
#ifdef _KERNEL
diff --git a/sys/sys/specialfd.h b/sys/sys/specialfd.h
new file mode 100644
--- /dev/null
+++ b/sys/sys/specialfd.h
@@ -0,0 +1,42 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Greg V
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_SPECIALFD_H_
+#define _SYS_SPECIALFD_H_
+
+enum specialfd_type {
+ SPECIALFD_EVENTFD = 1,
+};
+
+struct specialfd_eventfd {
+ unsigned int initval;
+ int flags;
+};
+
+#endif /* !_SYS_SPECIALFD_H_ */
diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h
--- a/sys/sys/syscallsubr.h
+++ b/sys/sys/syscallsubr.h
@@ -304,6 +304,7 @@
int kern_statat(struct thread *td, int flag, int fd, const char *path,
enum uio_seg pathseg, struct stat *sbp,
void (*hook)(struct vnode *vp, struct stat *sbp));
+int kern_specialfd(struct thread *td, int type, void *arg);
int kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg,
struct statfs *buf);
int kern_symlinkat(struct thread *td, const char *path1, int fd,
diff --git a/sys/sys/user.h b/sys/sys/user.h
--- a/sys/sys/user.h
+++ b/sys/sys/user.h
@@ -264,6 +264,7 @@
#define KF_TYPE_PTS 10
#define KF_TYPE_PROCDESC 11
#define KF_TYPE_DEV 12
+#define KF_TYPE_EVENTFD 13
#define KF_TYPE_UNKNOWN 255
#define KF_VTYPE_VNON 0
@@ -436,6 +437,10 @@
uint64_t kf_spareint64[32];
pid_t kf_pid;
} kf_proc;
+ struct {
+ uint64_t kf_eventfd_value;
+ uint32_t kf_eventfd_flags;
+ } kf_eventfd;
} kf_un;
};
uint16_t kf_status; /* Status flags. */

File Metadata

Mime Type
text/plain
Expires
Thu, May 1, 8:18 AM (5 h, 6 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
17877487
Default Alt Text
D26668.diff (28 KB)

Event Timeline