Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F95660157
D32505.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
24 KB
Referenced Files
None
Subscribers
None
D32505.diff
View Options
diff --git a/lib/libc/gen/Makefile.inc b/lib/libc/gen/Makefile.inc
--- a/lib/libc/gen/Makefile.inc
+++ b/lib/libc/gen/Makefile.inc
@@ -122,6 +122,7 @@
readpassphrase.c \
recvmmsg.c \
rewinddir.c \
+ rseq_abi.c \
scandir.c \
scandir_b.c \
scandir-compat11.c \
diff --git a/lib/libc/gen/Symbol.map b/lib/libc/gen/Symbol.map
--- a/lib/libc/gen/Symbol.map
+++ b/lib/libc/gen/Symbol.map
@@ -436,6 +436,7 @@
};
FBSD_1.7 {
+ __rseq_abi;
posix_spawn_file_actions_addchdir_np;
posix_spawn_file_actions_addclosefrom_np;
posix_spawn_file_actions_addfchdir_np;
@@ -573,4 +574,6 @@
__fillcontextx;
__fillcontextx2;
__getcontextx_size;
+
+ __rseq_abi_init;
};
diff --git a/lib/libc/gen/rseq_abi.c b/lib/libc/gen/rseq_abi.c
new file mode 100644
--- /dev/null
+++ b/lib/libc/gen/rseq_abi.c
@@ -0,0 +1,55 @@
+/*-
+ * Copyright (c) 2021 The FreeBSD Foundation
+ *
+ * This software were developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/auxv.h>
+#include <sys/rseq.h>
+#include <sched.h>
+#include "libc_private.h"
+
+_Thread_local volatile struct rseq __rseq_abi __weak_symbol;
+
+static void __main_rseq_abi_init(void) __attribute__((__constructor__,
+ __used__));
+static void
+__main_rseq_abi_init(void)
+{
+ __rseq_abi_init();
+}
+
+void
+__rseq_abi_init(void)
+{
+ int bsdflags;
+
+ if (_elf_aux_info(AT_BSDFLAGS, &bsdflags, sizeof(bsdflags)) != 0 ||
+ (bsdflags & ELF_BSDF_RSEQ1) == 0)
+ return;
+ __rseq_abi.cpu_id_start = sched_getcpu();
+ rseq(&__rseq_abi, sizeof(__rseq_abi), 0, 0/* XXXKIB */);
+}
diff --git a/lib/libc/include/libc_private.h b/lib/libc/include/libc_private.h
--- a/lib/libc/include/libc_private.h
+++ b/lib/libc/include/libc_private.h
@@ -435,4 +435,6 @@
struct __nl_cat_d *__catopen_l(const char *name, int type,
struct _xlocale *locale);
+void __rseq_abi_init(void);
+
#endif /* _LIBC_PRIVATE_H_ */
diff --git a/lib/libc/sys/Symbol.map b/lib/libc/sys/Symbol.map
--- a/lib/libc/sys/Symbol.map
+++ b/lib/libc/sys/Symbol.map
@@ -420,6 +420,7 @@
_Fork;
fspacectl;
membarrier;
+ rseq;
swapoff;
};
diff --git a/lib/libthr/thread/thr_create.c b/lib/libthr/thread/thr_create.c
--- a/lib/libthr/thread/thr_create.c
+++ b/lib/libthr/thread/thr_create.c
@@ -288,6 +288,8 @@
curthread->attr.stacksize_attr;
#endif
+ __rseq_abi_init();
+
/* Run the current thread's start routine with argument: */
_pthread_exit(curthread->start_routine(curthread->arg));
diff --git a/sys/conf/files b/sys/conf/files
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3827,6 +3827,7 @@
kern/kern_rctl.c standard
kern/kern_resource.c standard
kern/kern_rmlock.c standard
+kern/kern_rseq.c standard
kern/kern_rwlock.c standard
kern/kern_sdt.c optional kdtrace_hooks
kern/kern_sema.c standard
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -219,6 +219,11 @@
CTLFLAG_RWTUN, &__elfN(sigfastblock), 0,
"enable sigfastblock for new processes");
+static int __elfN(rseq1) = 1;
+SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, rseq1,
+ CTLFLAG_RWTUN, &__elfN(rseq1), 0,
+ "enable rseq v1 ABI for new processes");
+
static bool __elfN(allow_wx) = true;
SYSCTL_BOOL(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, allow_wx,
CTLFLAG_RWTUN, &__elfN(allow_wx), 0,
@@ -1495,6 +1500,7 @@
oc = atomic_load_int(&vm_overcommit);
bsdflags |= (oc & (SWAP_RESERVE_FORCE_ON | SWAP_RESERVE_RLIMIT_ON)) !=
0 ? ELF_BSDF_VMNOOVERCOMMIT : 0;
+ bsdflags |= __elfN(rseq1) ? ELF_BSDF_RSEQ1 : 0;
AUXARGS_ENTRY(pos, AT_BSDFLAGS, bsdflags);
AUXARGS_ENTRY(pos, AT_ARGC, imgp->args->argc);
AUXARGS_ENTRY_PTR(pos, AT_ARGV, imgp->argv);
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -810,6 +810,7 @@
/* STOPs are no longer ignored, arrange for AST */
signotify(td);
}
+ td->td_rseq_abi = NULL;
if ((imgp->sysent->sv_setid_allowed != NULL &&
!(*imgp->sysent->sv_setid_allowed)(td, imgp)) ||
diff --git a/sys/kern/kern_membarrier.c b/sys/kern/kern_membarrier.c
--- a/sys/kern/kern_membarrier.c
+++ b/sys/kern/kern_membarrier.c
@@ -51,7 +51,21 @@
MEMBARRIER_CMD_PRIVATE_EXPEDITED | \
MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED | \
MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE | \
- MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE)
+ MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE | \
+ MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ | \
+ MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ)
+
+static void
+membarrier_action_rseq(void *arg __unused)
+{
+ struct thread *td;
+
+ td = curthread;
+ thread_lock(td);
+ ast_sched_locked(td, TDA_RSEQ);
+ td->td_flags |= TDF_RSEQ_MB;
+ thread_unlock(td);
+}
static void
membarrier_action_seqcst(void *arg __unused)
@@ -224,6 +238,29 @@
}
break;
+ case MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ:
+ if ((td->td_proc->p_flag2 & P2_MEMBAR_PRIVE_RSEQ) == 0) {
+ error = EPERM;
+ break;
+ }
+ pmap_active_cpus(vmspace_pmap(p->p_vmspace), &cs);
+ if ((flags & MEMBARRIER_CMD_FLAG_CPU) != 0) {
+ if (!CPU_ISSET(cpu_id, &cs))
+ break;
+ CPU_ZERO(&cs);
+ CPU_SET(cpu_id, &cs);
+ }
+ do_membarrier_ipi(&cs, membarrier_action_rseq);
+ break;
+
+ case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ:
+ if ((p->p_flag2 & P2_MEMBAR_PRIVE_RSEQ) == 0) {
+ PROC_LOCK(p);
+ p->p_flag2 |= P2_MEMBAR_PRIVE_RSEQ;
+ PROC_UNLOCK(p);
+ }
+ break;
+
default:
error = EINVAL;
break;
diff --git a/sys/kern/kern_rseq.c b/sys/kern/kern_rseq.c
new file mode 100644
--- /dev/null
+++ b/sys/kern/kern_rseq.c
@@ -0,0 +1,283 @@
+/*-
+ * Copyright (c) 2021 The FreeBSD Foundation
+ *
+ * This software were developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/sysproto.h>
+#include <sys/rseq.h>
+
+static void
+rseq_inactivate(struct thread *td)
+{
+ td->td_rseq_abi = NULL;
+}
+
+static void
+rseq_inactivate_sig(struct thread *td, void *addr, int si_code)
+{
+ ksiginfo_t ksi;
+
+ rseq_inactivate(td);
+ ksiginfo_init_trap(&ksi);
+ ksi.ksi_signo = SIGSEGV;
+ ksi.ksi_code = si_code;
+ ksi.ksi_trapno = 0;
+ ksi.ksi_addr = addr;
+ trapsignal(td, &ksi);
+}
+
+static bool
+rseq_check_signature(struct thread *td, const struct rseq_cs *rc)
+{
+ void *usig_addr;
+ uint32_t usig;
+ int error;
+
+ usig_addr = (void *)(rc->abort_ip - sizeof(usig));
+ error = copyin(usig_addr, &usig, sizeof(usig));
+ if (error != 0) {
+ rseq_inactivate_sig(td, usig_addr, SEGV_RSEQ_R);
+ return (true);
+ }
+ if (usig != td->td_rseq_sig) {
+ rseq_inactivate_sig(td, usig_addr, SEGV_RSEQ_SIG);
+ return (true);
+ }
+ return (false);
+}
+
+static void
+ast_rseq(struct thread *td, int tda __unused)
+{
+ struct rseq rs;
+ struct rseq_cs rc;
+ register_t pc;
+ int cpu, error;
+ bool clear_cs;
+
+ if (td->td_rseq_abi == NULL)
+ return;
+
+ /*
+ * We cannot enter critical section there to keep td_oncpu
+ * valid due to userspace access. We do not even want to
+ * sched_pin() for the same reason.
+ *
+ * It is fine to get a context switch after reading td_oncpu,
+ * since this would cause new AST pending and we re-enter this
+ * function to update rseq cpu number.
+ *
+ * Microoptimize 64bit architectures by doing single 64bit
+ * write for cpu ids. For instance, on SMAP-enabled amd64
+ * this saves two serialization instructions STAC/CLAC.
+ */
+ cpu = td->td_oncpu;
+#ifdef __LP64__
+ rs.cpu_id_start = cpu;
+ rs.cpu_id = cpu;
+ error = suword64((char *)td->td_rseq_abi + offsetof(struct rseq,
+ cpu_id_start), *(uint64_t *)(char *)&rs.cpu_id_start);
+#else
+ error = suword((char *)td->td_rseq_abi + offsetof(struct rseq,
+ cpu_id_start), cpu);
+ if (error == 0) {
+ error = suword((char *)td->td_rseq_abi +
+ offsetof(struct rseq, cpu_id), cpu);
+ }
+#endif
+ if (error != 0) {
+ rseq_inactivate_sig(td, td->td_rseq_abi, SEGV_RSEQ_W);
+ return;
+ }
+
+ error = copyin(td->td_rseq_abi, &rs, sizeof(rs));
+ if (error != 0) {
+ rseq_inactivate_sig(td, td->td_rseq_abi, SEGV_RSEQ_R);
+ return;
+ }
+
+ if (rs.rseq_cs.ptr64 == 0)
+ return;
+ clear_cs = false;
+
+ critical_enter();
+ if ((td->td_flags & (TDF_RSEQ_CLRCS | TDF_RSEQ_MB)) == 0 &&
+ (rs.flags & RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT) != 0 &&
+ ((rs.flags & RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE) != 0 ||
+ td->td_oncpu == td->td_lastcpu))
+ return;
+ critical_exit();
+
+ error = copyin((void *)rs.rseq_cs.ptr64, &rc, sizeof(rc));
+ if (error != 0) {
+ rseq_inactivate_sig(td, (void *)rs.rseq_cs.ptr64, SEGV_RSEQ_R);
+ return;
+ }
+ if (rc.version != 0) {
+ rseq_inactivate_sig(td, (void *)rs.rseq_cs.ptr64, SEGV_RSEQ_R);
+ return;
+ }
+
+ critical_enter();
+ if ((td->td_flags & (TDF_RSEQ_CLRCS | TDF_RSEQ_MB)) == 0 &&
+ (rc.flags & RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT) != 0 &&
+ ((rc.flags & RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE) != 0 ||
+ td->td_oncpu == td->td_lastcpu))
+ return;
+ critical_exit();
+
+ if ((td->td_flags & (TDF_RSEQ_CLRCS | TDF_RSEQ_MB)) != 0) {
+ if ((td->td_flags & TDF_RSEQ_CLRCS) != 0)
+ clear_cs = true;
+ thread_lock(td);
+ td->td_flags &= ~(TDF_RSEQ_CLRCS | TDF_RSEQ_MB);
+ thread_unlock(td);
+ }
+ pc = TRAPF_PC(td->td_frame);
+ if (!clear_cs &&
+ pc >= rc.start_ip && pc < rc.start_ip + rc.post_commit_offset) {
+ if (rseq_check_signature(td, &rc))
+ return;
+
+ TRAPF_PC(td->td_frame) = rc.abort_ip;
+ clear_cs = true;
+ }
+ if (clear_cs) {
+ if (suword64((char *)td->td_rseq_abi + offsetof(struct rseq,
+ rseq_cs.ptr), 0) == -1) {
+ rseq_inactivate_sig(td, (char *)td->td_rseq_abi +
+ offsetof(struct rseq, rseq_cs.ptr),
+ SEGV_RSEQ_W);
+ return;
+ }
+ }
+}
+
+void
+rseq_before_sig(struct thread *td)
+{
+ struct rseq rs;
+ struct rseq_cs rc;
+ int error;
+
+ td->td_pflags2 &= ~TDP2_RSEQ_SIG;
+ if (td->td_rseq_abi == NULL)
+ return;
+
+ error = copyin(td->td_rseq_abi, &rs, sizeof(rs));
+ if (error != 0) {
+ rseq_inactivate_sig(td, td->td_rseq_abi, SEGV_RSEQ_R);
+ return;
+ }
+
+ if (rs.rseq_cs.ptr64 == 0 ||
+ (rs.flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL) != 0)
+ return;
+
+ error = copyin((void *)rs.rseq_cs.ptr64, &rc, sizeof(rc));
+ if (error != 0) {
+ rseq_inactivate_sig(td, (void *)rs.rseq_cs.ptr64, SEGV_RSEQ_R);
+ return;
+ }
+
+ if ((rc.flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL) != 0)
+ return;
+
+ if (rseq_check_signature(td, &rc))
+ return;
+
+ td->td_pflags2 |= TDP2_RSEQ_SIG;
+ td->td_rseq_start_ip = rc.start_ip;
+ td->td_rseq_end_ip = rc.start_ip + rc.post_commit_offset;
+ td->td_rseq_abort_ip = rc.abort_ip;
+}
+
+void
+rseq_on_sig(struct thread *td)
+{
+ register_t pc;
+
+ if ((td->td_pflags2 & TDP2_RSEQ_SIG) == 0)
+ return;
+ td->td_pflags2 &= ~TDP2_RSEQ_SIG;
+ pc = TRAPF_PC(td->td_frame);
+ if (pc >= td->td_rseq_start_ip && pc < td->td_rseq_end_ip) {
+ TRAPF_PC(td->td_frame) = td->td_rseq_abort_ip;
+ thread_lock(td);
+ ast_sched_locked(td, TDA_RSEQ);
+ td->td_flags |= TDF_RSEQ_CLRCS;
+ thread_unlock(td);
+ }
+}
+
+static int
+kern_rseq(struct thread *td, uintptr_t rseq, uint32_t rseqlen, int flags,
+ uint32_t sig)
+{
+ if (rseqlen != sizeof(struct rseq))
+ return (EINVAL);
+
+ if (flags == RSEQ_FLAG_UNREGISTER) {
+ if (rseq != 0 || td->td_rseq_abi == NULL)
+ return (EINVAL);
+ if (sig != td->td_rseq_sig)
+ return (EPERM);
+ rseq_inactivate(td);
+ return (0);
+ }
+
+ if (td->td_rseq_abi != NULL)
+ return (EBUSY);
+ if (flags != 0 || rseq == 0 ||
+ trunc_page(rseq) != trunc_page(rseq + rseqlen))
+ return (EINVAL);
+
+ td->td_rseq_abi = (void *)rseq;
+ td->td_rseq_sig = sig;
+ ast_sched(td, TDA_RSEQ);
+ return (0);
+}
+
+int
+sys_rseq(struct thread *td, struct rseq_args *uap)
+{
+ return (kern_rseq(td, (uintptr_t)uap->rseq, uap->rseqlen,
+ uap->flags, uap->sig));
+}
+
+static void
+rseq_init(void *arg __unused)
+{
+ ast_register(TDA_RSEQ, ASTR_ASTF_REQUIRED, 0, ast_rseq);
+}
+SYSINIT(rseq, SI_SUB_P1003_1B, SI_ORDER_ANY, rseq_init, NULL);
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -69,6 +69,7 @@
#include <sys/posix4.h>
#include <sys/racct.h>
#include <sys/resourcevar.h>
+#include <sys/rseq.h>
#include <sys/sdt.h>
#include <sys/sbuf.h>
#include <sys/sleepqueue.h>
@@ -2108,6 +2109,7 @@
KASSERT(_SIG_VALID(sig), ("invalid signal"));
sigfastblock_fetch(td);
+ rseq_before_sig(td);
PROC_LOCK(p);
ps = p->p_sigacts;
mtx_lock(&ps->ps_mtx);
@@ -2121,6 +2123,7 @@
ktrpsig(sig, ps->ps_sigact[_SIG_IDX(sig)],
&td->td_sigmask, ksi->ksi_code);
#endif
+ rseq_on_sig(td);
(*p->p_sysent->sv_sendsig)(ps->ps_sigact[_SIG_IDX(sig)],
ksi, &td->td_sigmask);
postsig_done(sig, td, ps);
@@ -3341,6 +3344,7 @@
if (p->p_sig == sig) {
p->p_sig = 0;
}
+ rseq_on_sig(td);
(*p->p_sysent->sv_sendsig)(action, &ksi, &returnmask);
postsig_done(sig, td, ps);
}
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -53,6 +53,7 @@
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
+#include <sys/rseq.h>
#include <sys/sched.h>
#include <sys/sdt.h>
#include <sys/signalvar.h>
@@ -491,7 +492,7 @@
mi_switch(int flags)
{
uint64_t runtime, new_switchtime;
- struct thread *td;
+ struct thread *td, *td1;
td = curthread; /* XXX */
THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED);
@@ -549,10 +550,12 @@
/*
* If the last thread was exiting, finish cleaning it up.
*/
- if ((td = PCPU_GET(deadthread))) {
+ if ((td1 = PCPU_GET(deadthread))) {
PCPU_SET(deadthread, NULL);
- thread_stash(td);
+ thread_stash(td1);
}
+ if (td->td_rseq_abi != NULL)
+ ast_sched(td, TDA_RSEQ);
spinlock_exit();
}
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -89,9 +89,9 @@
"struct thread KBI td_flags");
_Static_assert(offsetof(struct thread, td_pflags) == 0x114,
"struct thread KBI td_pflags");
-_Static_assert(offsetof(struct thread, td_frame) == 0x4b0,
+_Static_assert(offsetof(struct thread, td_frame) == 0x4d0,
"struct thread KBI td_frame");
-_Static_assert(offsetof(struct thread, td_emuldata) == 0x6c0,
+_Static_assert(offsetof(struct thread, td_emuldata) == 0x6e0,
"struct thread KBI td_emuldata");
_Static_assert(offsetof(struct proc, p_flag) == 0xb8,
"struct proc KBI p_flag");
@@ -110,8 +110,10 @@
_Static_assert(offsetof(struct thread, td_pflags) == 0xa8,
"struct thread KBI td_pflags");
_Static_assert(offsetof(struct thread, td_frame) == 0x30c,
+_Static_assert(offsetof(struct thread, td_frame) == 0x308,
"struct thread KBI td_frame");
_Static_assert(offsetof(struct thread, td_emuldata) == 0x350,
+_Static_assert(offsetof(struct thread, td_emuldata) == 0x360,
"struct thread KBI td_emuldata");
_Static_assert(offsetof(struct proc, p_flag) == 0x6c,
"struct proc KBI p_flag");
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -57,6 +57,7 @@
#include <sys/proc.h>
#include <sys/ktr.h>
#include <sys/resourcevar.h>
+#include <sys/rseq.h>
#include <sys/sched.h>
#include <sys/syscall.h>
#include <sys/syscallsubr.h>
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -3312,6 +3312,14 @@
int cpu_id
);
}
+584 AUE_NULL STD|CAPENABLED {
+ int rseq(
+ _Inout_updates_bytes_(rseqlen) void *rseq,
+ uint32_t rseqlen,
+ int flags,
+ uint32_t sig
+ );
+ }
; vim: syntax=off
diff --git a/sys/sys/elf_common.h b/sys/sys/elf_common.h
--- a/sys/sys/elf_common.h
+++ b/sys/sys/elf_common.h
@@ -1504,5 +1504,6 @@
#define ELF_BSDF_SIGFASTBLK 0x0001 /* Kernel supports fast sigblock */
#define ELF_BSDF_VMNOOVERCOMMIT 0x0002
+#define ELF_BSDF_RSEQ1 0x0004 /* Kernel support for rseq v1 */
#endif /* !_SYS_ELF_COMMON_H_ */
diff --git a/sys/sys/membarrier.h b/sys/sys/membarrier.h
--- a/sys/sys/membarrier.h
+++ b/sys/sys/membarrier.h
@@ -47,12 +47,6 @@
MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED = 0x00000010,
MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE = 0x00000020,
MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE = 0x00000040,
-
- /*
- * RSEQ constants are defined for source compatibility but are
- * not yes supported, MEMBARRIER_CMD_QUERY does not return
- * them in the mask.
- */
MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = 0x00000080,
MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = 0x00000100,
};
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -324,6 +324,11 @@
size_t td_vslock_sz; /* (k) amount of vslock-ed space */
struct kcov_info *td_kcov_info; /* (*) Kernel code coverage data */
u_int td_ucredref; /* (k) references on td_realucred */
+ uint32_t td_rseq_sig; /* (k) abort handler signature */
+ void *td_rseq_abi; /* (k) usermode rseq */
+ register_t td_rseq_start_ip;/* (k) */
+ register_t td_rseq_end_ip; /* (k) */
+ register_t td_rseq_abort_ip;/* (k) */
#define td_endzero td_sigmask
/* Copied during fork1() or create_thread(). */
@@ -476,9 +481,9 @@
#define TDF_SCHED2 0x04000000 /* Reserved for scheduler private use */
#define TDF_SCHED3 0x08000000 /* Reserved for scheduler private use */
#define TDF_UNUSED7 0x10000000 /* Available */
-#define TDF_UNUSED8 0x20000000 /* Available */
-#define TDF_UNUSED9 0x40000000 /* Available */
-#define TDF_UNUSED10 0x80000000 /* Available */
+#define TDF_RSEQ 0x20000000 /* rseq active */
+#define TDF_RSEQ_MB 0x40000000 /* MEMBARRIER_RSEQ requested */
+#define TDF_RSEQ_CLRCS 0x80000000 /* rseq clear rc */
enum {
TDA_AST = 0, /* Special: call all non-flagged AST handlers */
@@ -496,6 +501,7 @@
TDA_MOD1, /* For third party use, before signals are */
TAD_MOD2, /* processed .. */
TDA_SIG,
+ TDA_RSEQ,
TDA_KTRACE,
TDA_SUSPEND,
TDA_SIGSUSPEND,
@@ -566,6 +572,7 @@
#define TDP2_SBPAGES 0x00000001 /* Owns sbusy on some pages */
#define TDP2_COMPAT32RB 0x00000002 /* compat32 ABI for robust lists */
#define TDP2_ACCT 0x00000004 /* Doing accounting */
+#define TDP2_RSEQ_SIG 0x00000008
/*
* Reasons that the current thread can not be run yet.
@@ -886,6 +893,8 @@
sync core registered */
#define P2_MEMBAR_GLOBE 0x00200000 /* membar global expedited
registered */
+#define P2_MEMBAR_PRIVE_RSEQ 0x00200000 /* membar private expedited
+ rseq registered */
/* Flags protected by proctree_lock, kept in p_treeflags. */
#define P_TREE_ORPHANED 0x00000001 /* Reparented, on orphan list */
diff --git a/sys/sys/rseq.h b/sys/sys/rseq.h
new file mode 100644
--- /dev/null
+++ b/sys/sys/rseq.h
@@ -0,0 +1,99 @@
+/*-
+ * Copyright (c) 2021 The FreeBSD Foundation
+ *
+ * This software were developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef __SYS_RSEQ_H__
+#define __SYS_RSEQ_H__
+
+#include <sys/cdefs.h>
+#include <sys/types.h>
+#include <sys/endian.h>
+
+enum rseq_cpu_id_state {
+ RSEQ_CPU_ID_UNINITIALIZED = -1,
+ RSEQ_CPU_ID_REGISTRATION_FAILED = -2,
+};
+
+enum rseq_flags {
+ RSEQ_FLAG_UNREGISTER = 1,
+};
+
+enum rseq_cs_flags {
+ RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT = 0x00000001,
+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL = 0x00000002,
+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE = 0x00000004,
+};
+
+struct rseq_cs {
+ uint32_t version;
+ uint32_t flags;
+ uint64_t start_ip;
+ uint64_t post_commit_offset;
+ uint64_t abort_ip;
+};
+
+struct rseq {
+ uint32_t cpu_id_start;
+ uint32_t cpu_id;
+ union {
+ uint64_t ptr64;
+#ifdef __LP64__
+ uint64_t ptr;
+#else
+ struct {
+#if _BYTE_ORDER == _BIG_ENDIAN
+ uint32_t pad;
+ uint32_t ptr32;
+#else /* BYTE_ORDER */
+ uint32_t ptr32;
+ uint32_t pad;
+#endif /* BYTE_ORDER */
+ } ptr;
+#endif /* LP64 */
+ } rseq_cs;
+ uint32_t flags;
+};
+
+#ifdef _KERNEL
+
+#define TD_RSEQ_ACTIVE 0x00000001
+
+void rseq_ast(struct thread *td);
+void rseq_before_sig(struct thread *td);
+void rseq_on_sig(struct thread *td);
+
+#else /* _KERNEL */
+
+__BEGIN_DECLS
+extern __thread volatile struct rseq __rseq_abi __weak_symbol;
+
+int rseq(volatile struct rseq *rseq, uint32_t rseqlen, int flags, uint32_t sig);
+__END_DECLS
+
+#endif /* _KERNEL */
+
+#endif /* __SYS_RSEQ_H__ */
diff --git a/sys/sys/signal.h b/sys/sys/signal.h
--- a/sys/sys/signal.h
+++ b/sys/sys/signal.h
@@ -329,6 +329,9 @@
#define SEGV_ACCERR 2 /* Invalid permissions for mapped */
/* object. */
#define SEGV_PKUERR 100 /* x86: PKU violation */
+#define SEGV_RSEQ_R 101 /* rseq access read fault */
+#define SEGV_RSEQ_W 102 /* rseq access write fault */
+#define SEGV_RSEQ_SIG 103 /* rseq signature check fault */
/* codes for SIGFPE */
#define FPE_INTOVF 1 /* Integer overflow. */
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Sep 23, 1:31 AM (5 h, 59 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
12495677
Default Alt Text
D32505.diff (24 KB)
Attached To
Mode
D32505: Add rseq(2)
Attached
Detach File
Event Timeline
Log In to Comment