Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F115913180
D26668.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
28 KB
Referenced Files
None
Subscribers
None
D26668.diff
View Options
diff --git a/sys/bsm/audit_kevents.h b/sys/bsm/audit_kevents.h
--- a/sys/bsm/audit_kevents.h
+++ b/sys/bsm/audit_kevents.h
@@ -659,6 +659,7 @@
#define AUE_SHMRENAME 43263 /* FreeBSD-specific. */
#define AUE_REALPATHAT 43264 /* FreeBSD-specific. */
#define AUE_CLOSERANGE 43265 /* FreeBSD-specific. */
+#define AUE_SPECIALFD 43266 /* FreeBSD-specific. */
/*
* Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the
diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/syscalls.master
--- a/sys/compat/freebsd32/syscalls.master
+++ b/sys/compat/freebsd32/syscalls.master
@@ -1168,5 +1168,7 @@
; 576 is initialised by the krpc code, if present.
576 AUE_NULL NOSTD|NOPROTO { int rpctls_syscall(int op, \
const char *path); }
+577 AUE_SPECIALFD NOPROTO { int __specialfd(int type, const void *req, \
+ size_t len); }
; vim: syntax=off
diff --git a/sys/compat/linux/linux_event.c b/sys/compat/linux/linux_event.c
--- a/sys/compat/linux/linux_event.c
+++ b/sys/compat/linux/linux_event.c
@@ -51,9 +51,11 @@
#include <sys/poll.h>
#include <sys/proc.h>
#include <sys/selinfo.h>
+#include <sys/specialfd.h>
#include <sys/sx.h>
#include <sys/syscallsubr.h>
#include <sys/timespec.h>
+#include <sys/eventfd.h>
#ifdef COMPAT_LINUX32
#include <machine/../linux32/linux.h>
@@ -124,53 +126,11 @@
int error;
};
-/* eventfd */
-typedef uint64_t eventfd_t;
-
-static fo_rdwr_t eventfd_read;
-static fo_rdwr_t eventfd_write;
-static fo_ioctl_t eventfd_ioctl;
-static fo_poll_t eventfd_poll;
-static fo_kqfilter_t eventfd_kqfilter;
-static fo_stat_t eventfd_stat;
-static fo_close_t eventfd_close;
-static fo_fill_kinfo_t eventfd_fill_kinfo;
-
-static struct fileops eventfdops = {
- .fo_read = eventfd_read,
- .fo_write = eventfd_write,
- .fo_truncate = invfo_truncate,
- .fo_ioctl = eventfd_ioctl,
- .fo_poll = eventfd_poll,
- .fo_kqfilter = eventfd_kqfilter,
- .fo_stat = eventfd_stat,
- .fo_close = eventfd_close,
- .fo_chmod = invfo_chmod,
- .fo_chown = invfo_chown,
- .fo_sendfile = invfo_sendfile,
- .fo_fill_kinfo = eventfd_fill_kinfo,
- .fo_flags = DFLAG_PASSABLE
-};
-
-static void filt_eventfddetach(struct knote *kn);
-static int filt_eventfdread(struct knote *kn, long hint);
-static int filt_eventfdwrite(struct knote *kn, long hint);
-
-static struct filterops eventfd_rfiltops = {
- .f_isfd = 1,
- .f_detach = filt_eventfddetach,
- .f_event = filt_eventfdread
-};
-static struct filterops eventfd_wfiltops = {
- .f_isfd = 1,
- .f_detach = filt_eventfddetach,
- .f_event = filt_eventfdwrite
-};
-
/* timerfd */
typedef uint64_t timerfd_t;
static fo_rdwr_t timerfd_read;
+static fo_ioctl_t timerfd_ioctl;
static fo_poll_t timerfd_poll;
static fo_kqfilter_t timerfd_kqfilter;
static fo_stat_t timerfd_stat;
@@ -181,7 +141,7 @@
.fo_read = timerfd_read,
.fo_write = invfo_rdwr,
.fo_truncate = invfo_truncate,
- .fo_ioctl = eventfd_ioctl,
+ .fo_ioctl = timerfd_ioctl,
.fo_poll = timerfd_poll,
.fo_kqfilter = timerfd_kqfilter,
.fo_stat = timerfd_stat,
@@ -202,13 +162,6 @@
.f_event = filt_timerfdread
};
-struct eventfd {
- eventfd_t efd_count;
- uint32_t efd_flags;
- struct selinfo efd_sel;
- struct mtx efd_lock;
-};
-
struct timerfd {
clockid_t tfd_clockid;
struct itimerspec tfd_time;
@@ -219,7 +172,6 @@
struct mtx tfd_lock;
};
-static int eventfd_create(struct thread *td, uint32_t initval, int flags);
static void linux_timerfd_expire(void *);
static void linux_timerfd_curval(struct timerfd *, struct itimerspec *);
@@ -691,294 +643,39 @@
return (error1 == 0 ? 0 : error2);
}
-static int
-eventfd_create(struct thread *td, uint32_t initval, int flags)
-{
- struct filedesc *fdp;
- struct eventfd *efd;
- struct file *fp;
- int fflags, fd, error;
-
- fflags = 0;
- if ((flags & LINUX_O_CLOEXEC) != 0)
- fflags |= O_CLOEXEC;
-
- fdp = td->td_proc->p_fd;
- error = falloc(td, &fp, &fd, fflags);
- if (error != 0)
- return (error);
-
- efd = malloc(sizeof(*efd), M_EPOLL, M_WAITOK | M_ZERO);
- efd->efd_flags = flags;
- efd->efd_count = initval;
- mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF);
-
- knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock);
-
- fflags = FREAD | FWRITE;
- if ((flags & LINUX_O_NONBLOCK) != 0)
- fflags |= FNONBLOCK;
-
- finit(fp, fflags, DTYPE_LINUXEFD, efd, &eventfdops);
- fdrop(fp, td);
-
- td->td_retval[0] = fd;
- return (error);
-}
-
#ifdef LINUX_LEGACY_SYSCALLS
int
linux_eventfd(struct thread *td, struct linux_eventfd_args *args)
{
+ struct specialfd_eventfd ae;
- return (eventfd_create(td, args->initval, 0));
+ bzero(&ae, sizeof(ae));
+ ae.initval = args->initval;
+ return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae));
}
#endif
int
linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args)
{
+ struct specialfd_eventfd ae;
+ int flags;
- if ((args->flags & ~(LINUX_O_CLOEXEC|LINUX_O_NONBLOCK|LINUX_EFD_SEMAPHORE)) != 0)
- return (EINVAL);
-
- return (eventfd_create(td, args->initval, args->flags));
-}
-
-static int
-eventfd_close(struct file *fp, struct thread *td)
-{
- struct eventfd *efd;
-
- efd = fp->f_data;
- if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
- return (EINVAL);
-
- seldrain(&efd->efd_sel);
- knlist_destroy(&efd->efd_sel.si_note);
-
- fp->f_ops = &badfileops;
- mtx_destroy(&efd->efd_lock);
- free(efd, M_EPOLL);
-
- return (0);
-}
-
-static int
-eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
- int flags, struct thread *td)
-{
- struct eventfd *efd;
- eventfd_t count;
- int error;
-
- efd = fp->f_data;
- if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
- return (EINVAL);
-
- if (uio->uio_resid < sizeof(eventfd_t))
- return (EINVAL);
-
- error = 0;
- mtx_lock(&efd->efd_lock);
-retry:
- if (efd->efd_count == 0) {
- if ((fp->f_flag & FNONBLOCK) != 0) {
- mtx_unlock(&efd->efd_lock);
- return (EAGAIN);
- }
- error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, "lefdrd", 0);
- if (error == 0)
- goto retry;
- }
- if (error == 0) {
- if ((efd->efd_flags & LINUX_EFD_SEMAPHORE) != 0) {
- count = 1;
- --efd->efd_count;
- } else {
- count = efd->efd_count;
- efd->efd_count = 0;
- }
- KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
- selwakeup(&efd->efd_sel);
- wakeup(&efd->efd_count);
- mtx_unlock(&efd->efd_lock);
- error = uiomove(&count, sizeof(eventfd_t), uio);
- } else
- mtx_unlock(&efd->efd_lock);
-
- return (error);
-}
-
-static int
-eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
- int flags, struct thread *td)
-{
- struct eventfd *efd;
- eventfd_t count;
- int error;
-
- efd = fp->f_data;
- if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
- return (EINVAL);
-
- if (uio->uio_resid < sizeof(eventfd_t))
- return (EINVAL);
-
- error = uiomove(&count, sizeof(eventfd_t), uio);
- if (error != 0)
- return (error);
- if (count == UINT64_MAX)
- return (EINVAL);
-
- mtx_lock(&efd->efd_lock);
-retry:
- if (UINT64_MAX - efd->efd_count <= count) {
- if ((fp->f_flag & FNONBLOCK) != 0) {
- mtx_unlock(&efd->efd_lock);
- /* Do not not return the number of bytes written */
- uio->uio_resid += sizeof(eventfd_t);
- return (EAGAIN);
- }
- error = mtx_sleep(&efd->efd_count, &efd->efd_lock,
- PCATCH, "lefdwr", 0);
- if (error == 0)
- goto retry;
- }
- if (error == 0) {
- efd->efd_count += count;
- KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
- selwakeup(&efd->efd_sel);
- wakeup(&efd->efd_count);
- }
- mtx_unlock(&efd->efd_lock);
-
- return (error);
-}
-
-static int
-eventfd_poll(struct file *fp, int events, struct ucred *active_cred,
- struct thread *td)
-{
- struct eventfd *efd;
- int revents = 0;
-
- efd = fp->f_data;
- if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
- return (POLLERR);
-
- mtx_lock(&efd->efd_lock);
- if ((events & (POLLIN|POLLRDNORM)) && efd->efd_count > 0)
- revents |= events & (POLLIN|POLLRDNORM);
- if ((events & (POLLOUT|POLLWRNORM)) && UINT64_MAX - 1 > efd->efd_count)
- revents |= events & (POLLOUT|POLLWRNORM);
- if (revents == 0)
- selrecord(td, &efd->efd_sel);
- mtx_unlock(&efd->efd_lock);
-
- return (revents);
-}
-
-static int
-eventfd_kqfilter(struct file *fp, struct knote *kn)
-{
- struct eventfd *efd;
-
- efd = fp->f_data;
- if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
- return (EINVAL);
-
- mtx_lock(&efd->efd_lock);
- switch (kn->kn_filter) {
- case EVFILT_READ:
- kn->kn_fop = &eventfd_rfiltops;
- break;
- case EVFILT_WRITE:
- kn->kn_fop = &eventfd_wfiltops;
- break;
- default:
- mtx_unlock(&efd->efd_lock);
- return (EINVAL);
- }
-
- kn->kn_hook = efd;
- knlist_add(&efd->efd_sel.si_note, kn, 1);
- mtx_unlock(&efd->efd_lock);
-
- return (0);
-}
-
-static void
-filt_eventfddetach(struct knote *kn)
-{
- struct eventfd *efd = kn->kn_hook;
-
- mtx_lock(&efd->efd_lock);
- knlist_remove(&efd->efd_sel.si_note, kn, 1);
- mtx_unlock(&efd->efd_lock);
-}
-
-static int
-filt_eventfdread(struct knote *kn, long hint)
-{
- struct eventfd *efd = kn->kn_hook;
- int ret;
-
- mtx_assert(&efd->efd_lock, MA_OWNED);
- ret = (efd->efd_count > 0);
-
- return (ret);
-}
-
-static int
-filt_eventfdwrite(struct knote *kn, long hint)
-{
- struct eventfd *efd = kn->kn_hook;
- int ret;
-
- mtx_assert(&efd->efd_lock, MA_OWNED);
- ret = (UINT64_MAX - 1 > efd->efd_count);
-
- return (ret);
-}
-
-static int
-eventfd_ioctl(struct file *fp, u_long cmd, void *data,
- struct ucred *active_cred, struct thread *td)
-{
-
- if (fp->f_data == NULL || (fp->f_type != DTYPE_LINUXEFD &&
- fp->f_type != DTYPE_LINUXTFD))
+ if ((args->flags & ~(LINUX_O_CLOEXEC | LINUX_O_NONBLOCK |
+ LINUX_EFD_SEMAPHORE)) != 0)
return (EINVAL);
-
- switch (cmd)
- {
- case FIONBIO:
- if ((*(int *)data))
- atomic_set_int(&fp->f_flag, FNONBLOCK);
- else
- atomic_clear_int(&fp->f_flag, FNONBLOCK);
- case FIOASYNC:
- return (0);
- default:
- return (ENXIO);
- }
-}
-
-static int
-eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
- struct thread *td)
-{
-
- return (ENXIO);
-}
-
-static int
-eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
-{
-
- kif->kf_type = KF_TYPE_UNKNOWN;
- return (0);
+ flags = 0;
+ if ((args->flags & LINUX_O_CLOEXEC) != 0)
+ flags |= EFD_CLOEXEC;
+ if ((args->flags & LINUX_O_NONBLOCK) != 0)
+ flags |= EFD_NONBLOCK;
+ if ((args->flags & LINUX_EFD_SEMAPHORE) != 0)
+ flags |= EFD_SEMAPHORE;
+
+ bzero(&ae, sizeof(ae));
+ ae.flags = flags;
+ ae.initval = args->initval;
+ return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae));
}
int
@@ -1154,6 +851,23 @@
return (tfd->tfd_count > 0);
}
+static int
+timerfd_ioctl(struct file *fp, u_long cmd, void *data,
+ struct ucred *active_cred, struct thread *td)
+{
+
+ if (fp->f_data == NULL || fp->f_type != DTYPE_LINUXTFD)
+ return (EINVAL);
+
+ switch (cmd) {
+ case FIONBIO:
+ case FIOASYNC:
+ return (0);
+ }
+
+ return (ENOTTY);
+}
+
static int
timerfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
struct thread *td)
diff --git a/sys/conf/files b/sys/conf/files
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3926,6 +3926,7 @@
kern/subr_vmem.c standard
kern/subr_witness.c optional witness
kern/sys_capability.c standard
+kern/sys_eventfd.c standard
kern/sys_generic.c standard
kern/sys_getrandom.c standard
kern/sys_pipe.c standard
diff --git a/sys/kern/capabilities.conf b/sys/kern/capabilities.conf
--- a/sys/kern/capabilities.conf
+++ b/sys/kern/capabilities.conf
@@ -55,6 +55,11 @@
__mac_set_fd
__mac_set_proc
+##
+## Allow creating special file descriptors like eventfd(2).
+##
+__specialfd
+
##
## Allow sysctl(2) as we scope internal to the call; this is a global
## namespace, but there are several critical sysctls required for almost
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -4609,8 +4609,8 @@
return ("dev");
case DTYPE_PROCDESC:
return ("proc");
- case DTYPE_LINUXEFD:
- return ("levent");
+ case DTYPE_EVENTFD:
+ return ("eventfd");
case DTYPE_LINUXTFD:
return ("ltimer");
default:
diff --git a/sys/kern/sys_eventfd.c b/sys/kern/sys_eventfd.c
new file mode 100644
--- /dev/null
+++ b/sys/kern/sys_eventfd.c
@@ -0,0 +1,349 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2007 Roman Divacky
+ * Copyright (c) 2014 Dmitry Chagin
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/types.h>
+#include <sys/user.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/filio.h>
+#include <sys/stat.h>
+#include <sys/errno.h>
+#include <sys/event.h>
+#include <sys/poll.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/selinfo.h>
+#include <sys/eventfd.h>
+
+#include <security/audit/audit.h>
+
+_Static_assert(EFD_CLOEXEC == O_CLOEXEC, "Mismatched EFD_CLOEXEC");
+_Static_assert(EFD_NONBLOCK == O_NONBLOCK, "Mismatched EFD_NONBLOCK");
+
+MALLOC_DEFINE(M_EVENTFD, "eventfd", "eventfd structures");
+
+static fo_rdwr_t eventfd_read;
+static fo_rdwr_t eventfd_write;
+static fo_ioctl_t eventfd_ioctl;
+static fo_poll_t eventfd_poll;
+static fo_kqfilter_t eventfd_kqfilter;
+static fo_stat_t eventfd_stat;
+static fo_close_t eventfd_close;
+static fo_fill_kinfo_t eventfd_fill_kinfo;
+
+static struct fileops eventfdops = {
+ .fo_read = eventfd_read,
+ .fo_write = eventfd_write,
+ .fo_truncate = invfo_truncate,
+ .fo_ioctl = eventfd_ioctl,
+ .fo_poll = eventfd_poll,
+ .fo_kqfilter = eventfd_kqfilter,
+ .fo_stat = eventfd_stat,
+ .fo_close = eventfd_close,
+ .fo_chmod = invfo_chmod,
+ .fo_chown = invfo_chown,
+ .fo_sendfile = invfo_sendfile,
+ .fo_fill_kinfo = eventfd_fill_kinfo,
+ .fo_flags = DFLAG_PASSABLE
+};
+
+static void filt_eventfddetach(struct knote *kn);
+static int filt_eventfdread(struct knote *kn, long hint);
+static int filt_eventfdwrite(struct knote *kn, long hint);
+
+static struct filterops eventfd_rfiltops = {
+ .f_isfd = 1,
+ .f_detach = filt_eventfddetach,
+ .f_event = filt_eventfdread
+};
+
+static struct filterops eventfd_wfiltops = {
+ .f_isfd = 1,
+ .f_detach = filt_eventfddetach,
+ .f_event = filt_eventfdwrite
+};
+
+struct eventfd {
+ eventfd_t efd_count;
+ uint32_t efd_flags;
+ struct selinfo efd_sel;
+ struct mtx efd_lock;
+};
+
+int
+eventfd_create_file(struct thread *td, struct file *fp, uint32_t initval,
+ int flags)
+{
+ struct eventfd *efd;
+ int fflags;
+
+ AUDIT_ARG_FFLAGS(flags);
+ AUDIT_ARG_VALUE(initval);
+
+ efd = malloc(sizeof(*efd), M_EVENTFD, M_WAITOK | M_ZERO);
+ efd->efd_flags = flags;
+ efd->efd_count = initval;
+ mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF);
+ knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock);
+
+ fflags = FREAD | FWRITE;
+ if ((flags & EFD_NONBLOCK) != 0)
+ fflags |= FNONBLOCK;
+ finit(fp, fflags, DTYPE_EVENTFD, efd, &eventfdops);
+
+ return (0);
+}
+
+static int
+eventfd_close(struct file *fp, struct thread *td)
+{
+ struct eventfd *efd;
+
+ efd = fp->f_data;
+ seldrain(&efd->efd_sel);
+ knlist_destroy(&efd->efd_sel.si_note);
+ mtx_destroy(&efd->efd_lock);
+ free(efd, M_EVENTFD);
+ return (0);
+}
+
+static int
+eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
+ int flags, struct thread *td)
+{
+ struct eventfd *efd;
+ eventfd_t count;
+ int error;
+
+ if (uio->uio_resid < sizeof(eventfd_t))
+ return (EINVAL);
+
+ error = 0;
+ efd = fp->f_data;
+ mtx_lock(&efd->efd_lock);
+ while (error == 0 && efd->efd_count == 0) {
+ if ((fp->f_flag & FNONBLOCK) != 0) {
+ mtx_unlock(&efd->efd_lock);
+ return (EAGAIN);
+ }
+ error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH,
+ "efdrd", 0);
+ }
+ if (error == 0) {
+ MPASS(efd->efd_count > 0);
+ if ((efd->efd_flags & EFD_SEMAPHORE) != 0) {
+ count = 1;
+ --efd->efd_count;
+ } else {
+ count = efd->efd_count;
+ efd->efd_count = 0;
+ }
+ KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
+ selwakeup(&efd->efd_sel);
+ wakeup(&efd->efd_count);
+ mtx_unlock(&efd->efd_lock);
+ error = uiomove(&count, sizeof(eventfd_t), uio);
+ } else
+ mtx_unlock(&efd->efd_lock);
+
+ return (error);
+}
+
+static int
+eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
+ int flags, struct thread *td)
+{
+ struct eventfd *efd;
+ eventfd_t count;
+ int error;
+
+ if (uio->uio_resid < sizeof(eventfd_t))
+ return (EINVAL);
+
+ error = uiomove(&count, sizeof(eventfd_t), uio);
+ if (error != 0)
+ return (error);
+ if (count == UINT64_MAX)
+ return (EINVAL);
+
+ efd = fp->f_data;
+ mtx_lock(&efd->efd_lock);
+retry:
+ if (UINT64_MAX - efd->efd_count <= count) {
+ if ((fp->f_flag & FNONBLOCK) != 0) {
+ mtx_unlock(&efd->efd_lock);
+ /* Do not not return the number of bytes written */
+ uio->uio_resid += sizeof(eventfd_t);
+ return (EAGAIN);
+ }
+ error = mtx_sleep(&efd->efd_count, &efd->efd_lock,
+ PCATCH, "efdwr", 0);
+ if (error == 0)
+ goto retry;
+ }
+ if (error == 0) {
+ MPASS(UINT64_MAX - efd->efd_count > count);
+ efd->efd_count += count;
+ KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
+ selwakeup(&efd->efd_sel);
+ wakeup(&efd->efd_count);
+ }
+ mtx_unlock(&efd->efd_lock);
+
+ return (error);
+}
+
+static int
+eventfd_poll(struct file *fp, int events, struct ucred *active_cred,
+ struct thread *td)
+{
+ struct eventfd *efd;
+ int revents;
+
+ efd = fp->f_data;
+ revents = 0;
+ mtx_lock(&efd->efd_lock);
+ if ((events & (POLLIN | POLLRDNORM)) != 0 && efd->efd_count > 0)
+ revents |= events & (POLLIN | POLLRDNORM);
+ if ((events & (POLLOUT | POLLWRNORM)) != 0 && UINT64_MAX - 1 >
+ efd->efd_count)
+ revents |= events & (POLLOUT | POLLWRNORM);
+ if (revents == 0)
+ selrecord(td, &efd->efd_sel);
+ mtx_unlock(&efd->efd_lock);
+
+ return (revents);
+}
+
+static int
+eventfd_kqfilter(struct file *fp, struct knote *kn)
+{
+ struct eventfd *efd = fp->f_data;
+
+ mtx_lock(&efd->efd_lock);
+ switch (kn->kn_filter) {
+ case EVFILT_READ:
+ kn->kn_fop = &eventfd_rfiltops;
+ break;
+ case EVFILT_WRITE:
+ kn->kn_fop = &eventfd_wfiltops;
+ break;
+ default:
+ mtx_unlock(&efd->efd_lock);
+ return (EINVAL);
+ }
+
+ kn->kn_hook = efd;
+ knlist_add(&efd->efd_sel.si_note, kn, 1);
+ mtx_unlock(&efd->efd_lock);
+
+ return (0);
+}
+
+static void
+filt_eventfddetach(struct knote *kn)
+{
+ struct eventfd *efd = kn->kn_hook;
+
+ mtx_lock(&efd->efd_lock);
+ knlist_remove(&efd->efd_sel.si_note, kn, 1);
+ mtx_unlock(&efd->efd_lock);
+}
+
+static int
+filt_eventfdread(struct knote *kn, long hint)
+{
+ struct eventfd *efd = kn->kn_hook;
+ int ret;
+
+ mtx_assert(&efd->efd_lock, MA_OWNED);
+ kn->kn_data = (int64_t)efd->efd_count;
+ ret = efd->efd_count > 0;
+
+ return (ret);
+}
+
+static int
+filt_eventfdwrite(struct knote *kn, long hint)
+{
+ struct eventfd *efd = kn->kn_hook;
+ int ret;
+
+ mtx_assert(&efd->efd_lock, MA_OWNED);
+ kn->kn_data = (int64_t)(UINT64_MAX - 1 - efd->efd_count);
+ ret = UINT64_MAX - 1 > efd->efd_count;
+
+ return (ret);
+}
+
+static int
+eventfd_ioctl(struct file *fp, u_long cmd, void *data,
+ struct ucred *active_cred, struct thread *td)
+{
+ switch (cmd) {
+ case FIONBIO:
+ case FIOASYNC:
+ return (0);
+ }
+
+ return (ENOTTY);
+}
+
+static int
+eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
+ struct thread *td)
+{
+ bzero((void *)st, sizeof *st);
+ st->st_mode = S_IFIFO;
+ return (0);
+}
+
+static int
+eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
+{
+ struct eventfd *efd = fp->f_data;
+
+ kif->kf_type = KF_TYPE_EVENTFD;
+ mtx_lock(&efd->efd_lock);
+ kif->kf_un.kf_eventfd.kf_eventfd_value = efd->efd_count;
+ kif->kf_un.kf_eventfd.kf_eventfd_flags = efd->efd_flags;
+ mtx_unlock(&efd->efd_lock);
+ return (0);
+}
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -55,6 +55,7 @@
#include <sys/signalvar.h>
#include <sys/socketvar.h>
#include <sys/uio.h>
+#include <sys/eventfd.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/limits.h>
@@ -63,6 +64,7 @@
#include <sys/resourcevar.h>
#include <sys/selinfo.h>
#include <sys/sleepqueue.h>
+#include <sys/specialfd.h>
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
#include <sys/sysent.h>
@@ -859,6 +861,67 @@
return (error);
}
+int
+kern_specialfd(struct thread *td, int type, void *arg)
+{
+ struct file *fp;
+ struct specialfd_eventfd *ae;
+ int error, fd, fflags;
+
+ fflags = 0;
+ error = falloc_noinstall(td, &fp);
+ if (error != 0)
+ return (error);
+
+ switch (type) {
+ case SPECIALFD_EVENTFD:
+ ae = arg;
+ if ((ae->flags & EFD_CLOEXEC) != 0)
+ fflags |= O_CLOEXEC;
+ error = eventfd_create_file(td, fp, ae->initval, ae->flags);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ if (error == 0)
+ error = finstall(td, fp, &fd, fflags, NULL);
+ fdrop(fp, td);
+ if (error == 0)
+ td->td_retval[0] = fd;
+ return (error);
+}
+
+int
+sys___specialfd(struct thread *td, struct __specialfd_args *args)
+{
+ struct specialfd_eventfd ae;
+ int error;
+
+ switch (args->type) {
+ case SPECIALFD_EVENTFD:
+ if (args->len != sizeof(struct specialfd_eventfd)) {
+ error = EINVAL;
+ break;
+ }
+ error = copyin(args->req, &ae, sizeof(ae));
+ if (error != 0)
+ break;
+ if ((ae.flags & ~(EFD_CLOEXEC | EFD_NONBLOCK |
+ EFD_SEMAPHORE)) != 0) {
+ error = EINVAL;
+ break;
+ }
+ error = kern_specialfd(td, args->type, &ae);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ return (error);
+}
+
int
poll_no_poll(int events)
{
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -3241,6 +3241,13 @@
_In_z_ const char *path
);
}
+577 AUE_SPECIALFD STD {
+ int __specialfd(
+ int type,
+ _In_reads_bytes_(len) const void *req,
+ size_t len
+ );
+ }
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master
diff --git a/sys/sys/eventfd.h b/sys/sys/eventfd.h
new file mode 100644
--- /dev/null
+++ b/sys/sys/eventfd.h
@@ -0,0 +1,54 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Greg V
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _SYS_EVENTFD_H_
+#define _SYS_EVENTFD_H_
+
+#include <sys/types.h>
+
+typedef uint64_t eventfd_t;
+
+#define EFD_SEMAPHORE 0x00000001
+#define EFD_NONBLOCK 0x00000004
+#define EFD_CLOEXEC 0x00100000
+
+#ifdef _KERNEL
+
+int eventfd_create_file(struct thread *td, struct file *fp, uint32_t initval,
+ int flags);
+
+#else
+
+__BEGIN_DECLS
+int eventfd(unsigned int initval, int flags);
+int eventfd_read(int fd, eventfd_t *value);
+int eventfd_write(int fd, eventfd_t value);
+__END_DECLS
+
+#endif /* !_KERNEL */
+
+#endif /* !_SYS_EVENTFD_H_ */
diff --git a/sys/sys/file.h b/sys/sys/file.h
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -69,7 +69,7 @@
#define DTYPE_PTS 10 /* pseudo teletype master device */
#define DTYPE_DEV 11 /* Device specific fd type */
#define DTYPE_PROCDESC 12 /* process descriptor */
-#define DTYPE_LINUXEFD 13 /* emulation eventfd type */
+#define DTYPE_EVENTFD 13 /* eventfd */
#define DTYPE_LINUXTFD 14 /* emulation timerfd type */
#ifdef _KERNEL
diff --git a/sys/sys/specialfd.h b/sys/sys/specialfd.h
new file mode 100644
--- /dev/null
+++ b/sys/sys/specialfd.h
@@ -0,0 +1,42 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Greg V
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_SPECIALFD_H_
+#define _SYS_SPECIALFD_H_
+
+enum specialfd_type {
+ SPECIALFD_EVENTFD = 1,
+};
+
+struct specialfd_eventfd {
+ unsigned int initval;
+ int flags;
+};
+
+#endif /* !_SYS_SPECIALFD_H_ */
diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h
--- a/sys/sys/syscallsubr.h
+++ b/sys/sys/syscallsubr.h
@@ -304,6 +304,7 @@
int kern_statat(struct thread *td, int flag, int fd, const char *path,
enum uio_seg pathseg, struct stat *sbp,
void (*hook)(struct vnode *vp, struct stat *sbp));
+int kern_specialfd(struct thread *td, int type, void *arg);
int kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg,
struct statfs *buf);
int kern_symlinkat(struct thread *td, const char *path1, int fd,
diff --git a/sys/sys/user.h b/sys/sys/user.h
--- a/sys/sys/user.h
+++ b/sys/sys/user.h
@@ -264,6 +264,7 @@
#define KF_TYPE_PTS 10
#define KF_TYPE_PROCDESC 11
#define KF_TYPE_DEV 12
+#define KF_TYPE_EVENTFD 13
#define KF_TYPE_UNKNOWN 255
#define KF_VTYPE_VNON 0
@@ -436,6 +437,10 @@
uint64_t kf_spareint64[32];
pid_t kf_pid;
} kf_proc;
+ struct {
+ uint64_t kf_eventfd_value;
+ uint32_t kf_eventfd_flags;
+ } kf_eventfd;
} kf_un;
};
uint16_t kf_status; /* Status flags. */
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, May 1, 8:18 AM (5 h, 6 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
17877487
Default Alt Text
D26668.diff (28 KB)
Attached To
Mode
D26668: Expose eventfd in the native API/ABI using a new __specialfd syscall
Attached
Detach File
Event Timeline
Log In to Comment