Page MenuHomeFreeBSD

D23587.diff
No OneTemporary

D23587.diff

Index: sys/amd64/amd64/trap.c
===================================================================
--- sys/amd64/amd64/trap.c
+++ sys/amd64/amd64/trap.c
@@ -999,6 +999,7 @@
regcnt--;
}
+ syscall_read_barrier();
if (sa->code >= p->p_sysent->sv_size)
sa->callp = &p->p_sysent->sv_table[0];
else
@@ -1041,6 +1042,7 @@
sa->code >= p->p_sysent->sv_size))
return (cpu_fetch_syscall_args_fallback(td, sa));
+ syscall_read_barrier();
sa->callp = &p->p_sysent->sv_table[sa->code];
sa->narg = sa->callp->sy_narg;
KASSERT(sa->narg <= nitems(sa->args), ("Too many syscall arguments!"));
Index: sys/arm/arm/syscall.c
===================================================================
--- sys/arm/arm/syscall.c
+++ sys/arm/arm/syscall.c
@@ -118,6 +118,7 @@
ap += 2;
}
p = td->td_proc;
+ syscall_read_barrier();
if (sa->code >= p->p_sysent->sv_size)
sa->callp = &p->p_sysent->sv_table[0];
else
Index: sys/arm64/arm64/trap.c
===================================================================
--- sys/arm64/arm64/trap.c
+++ sys/arm64/arm64/trap.c
@@ -136,6 +136,7 @@
nap--;
}
+ syscall_read_barrier();
if (sa->code >= p->p_sysent->sv_size)
sa->callp = &p->p_sysent->sv_table[0];
else
Index: sys/i386/i386/trap.c
===================================================================
--- sys/i386/i386/trap.c
+++ sys/i386/i386/trap.c
@@ -1085,6 +1085,7 @@
params += sizeof(quad_t);
}
+ syscall_read_barrier();
if (sa->code >= p->p_sysent->sv_size)
sa->callp = &p->p_sysent->sv_table[0];
else
Index: sys/kern/kern_syscalls.c
===================================================================
--- sys/kern/kern_syscalls.c
+++ sys/kern/kern_syscalls.c
@@ -41,6 +41,7 @@
#include <sys/sysent.h>
#include <sys/sysproto.h>
#include <sys/systm.h>
+#include <sys/smp.h>
#include <machine/atomic.h>
/*
@@ -64,44 +65,94 @@
}
static void
-syscall_thread_drain(struct sysent *se)
+syscall_wait_thread(struct thread *td, int code)
{
- u_int32_t cnt, oldcnt;
-
- do {
- oldcnt = se->sy_thrcnt;
- KASSERT((oldcnt & SY_THR_STATIC) == 0,
- ("drain on static syscall"));
- cnt = oldcnt | SY_THR_DRAINING;
- } while (atomic_cmpset_acq_32(&se->sy_thrcnt, oldcnt, cnt) == 0);
- while (atomic_cmpset_32(&se->sy_thrcnt, SY_THR_DRAINING,
- SY_THR_ABSENT) == 0)
+
+ while (atomic_load_int(&td->td_sa.code) == code)
pause("scdrn", hz/2);
}
-int
-_syscall_thread_enter(struct thread *td, struct sysent *se)
+static int
+syscall_wait(struct proc *p, int code, struct sx *lock)
{
- u_int32_t cnt, oldcnt;
-
- do {
- oldcnt = se->sy_thrcnt;
- if ((oldcnt & (SY_THR_DRAINING | SY_THR_ABSENT)) != 0)
- return (ENOSYS);
- cnt = oldcnt + SY_THR_INCR;
- } while (atomic_cmpset_acq_32(&se->sy_thrcnt, oldcnt, cnt) == 0);
+ struct thread *td;
+
+ PROC_LOCK(p);
+ FOREACH_THREAD_IN_PROC(p, td) {
+ if (atomic_load_int(&td->td_sa.code) != code)
+ continue;
+ sx_sunlock(lock);
+ PROC_UNLOCK(p);
+ syscall_wait_thread(td, code);
+ return (1);
+ }
+ PROC_UNLOCK(p);
return (0);
}
-void
-_syscall_thread_exit(struct thread *td, struct sysent *se)
+/*
+ * Observe all threads not executing the passed syscall.
+ *
+ * We are called when the func pointer is replaced with a dummy.
+ *
+ * All code preparing syscall execution uses syscall_read_barrier after setting
+ * the syscall number and before eading sysent. cpus_fence_seq_cst below both
+ * publishes our update and synchronizes against aforementioned consumers. This
+ * guarantees there will be no new threads executing the old syscall code (but
+ * there maybe new threads executing the newly installed ENOSYS handler).
+ *
+ * From then on we only have to observe all threads executing a different
+ * sysent (or no sysent in the first place).
+ *
+ * Note no effort is made to nudge forward a thread blocked on the syscall.
+ */
+static void
+syscall_drain(int code)
+{
+ struct proc *p;
+ int i, j;
+
+ cpus_fence_seq_cst();
+
+ for (i = 0; i < pidhashlock + 1; i++) {
+loop_unlocked:
+ sx_slock(&pidhashtbl_lock[i]);
+ for (j = i; j <= pidhash; j += pidhashlock + 1) {
+ LIST_FOREACH(p, &pidhashtbl[j], p_hash) {
+ if (p->p_state == PRS_NEW)
+ continue;
+ if (syscall_wait(p, code, &pidhashtbl_lock[i]))
+ /*
+ * Note this is likely to revisit
+ * the same process, which is fine.
+ * This avoids inserting a marker to
+ * make sure we did not miss anything.
+ */
+ goto loop_unlocked;
+ }
+ }
+ sx_sunlock(&pidhashtbl_lock[i]);
+ }
+}
+
+static void
+syscall_sysent_replace(struct sysent *se, struct sysent *old,
+ const struct sysent *new)
{
- u_int32_t cnt, oldcnt;
- do {
- oldcnt = se->sy_thrcnt;
- cnt = oldcnt - SY_THR_INCR;
- } while (atomic_cmpset_rel_32(&se->sy_thrcnt, oldcnt, cnt) == 0);
+ if (old != NULL)
+ *old = *se;
+
+ atomic_store_int(&se->sy_narg, new->sy_narg);
+ atomic_store_16(&se->sy_auevent, new->sy_auevent);
+ atomic_store_ptr((void *)&se->sy_systrace_args_func,
+ (uintptr_t)new->sy_systrace_args_func);
+ atomic_store_int(&se->sy_entry, new->sy_entry);
+ atomic_store_int(&se->sy_return, new->sy_return);
+ atomic_store_int(&se->sy_flags, new->sy_flags);
+ atomic_store_int(&se->sy_thrflags, new->sy_thrflags);
+ atomic_thread_fence_rel();
+ atomic_store_ptr((void *)&se->sy_call, (uintptr_t)new->sy_call);
}
int
@@ -129,12 +180,9 @@
return (EEXIST);
}
- KASSERT(sysents[*offset].sy_thrcnt == SY_THR_ABSENT,
+ KASSERT(sysents[*offset].sy_thrflags == SY_THR_ABSENT,
("dynamic syscall is not protected"));
- *old_sysent = sysents[*offset];
- new_sysent->sy_thrcnt = SY_THR_ABSENT;
- sysents[*offset] = *new_sysent;
- atomic_store_rel_32(&sysents[*offset].sy_thrcnt, flags);
+ syscall_sysent_replace(&sysents[*offset], old_sysent, new_sysent);
return (0);
}
@@ -148,10 +196,11 @@
return (0); /* XXX? */
se = &sysents[offset];
- if ((se->sy_thrcnt & SY_THR_STATIC) != 0)
+ if ((se->sy_thrflags & SY_THR_STATIC) != 0)
return (EINVAL);
- syscall_thread_drain(se);
- sysents[offset] = *old_sysent;
+ syscall_sysent_replace(se, NULL, old_sysent);
+ syscall_drain(offset);
+
return (0);
}
Index: sys/kern/kern_thr.c
===================================================================
--- sys/kern/kern_thr.c
+++ sys/kern/kern_thr.c
@@ -375,6 +375,7 @@
#ifdef AUDIT
AUDIT_SYSCALL_EXIT(0, td);
#endif
+ syscall_exit(td);
PROC_SLOCK(p);
thread_stopped(p);
Index: sys/kern/subr_syscall.c
===================================================================
--- sys/kern/subr_syscall.c
+++ sys/kern/subr_syscall.c
@@ -124,12 +124,6 @@
}
#endif
- error = syscall_thread_enter(td, sa->callp);
- if (error != 0) {
- td->td_errno = error;
- goto retval;
- }
-
/*
* Fetch fast sigblock value at the time of syscall
* entry because sleepqueue primitives might call
@@ -163,7 +157,6 @@
if (__predict_false((td->td_pflags & TDP_NERRNO) == 0))
td->td_errno = error;
}
- syscall_thread_exit(td, sa->callp);
retval:
KTR_STOP4(KTR_SYSC, "syscall", syscallname(p, sa->code),
Index: sys/kern/subr_trap.c
===================================================================
--- sys/kern/subr_trap.c
+++ sys/kern/subr_trap.c
@@ -137,6 +137,8 @@
td_softdep_cleanup(td);
MPASS(td->td_su == NULL);
+ syscall_exit(td);
+
/*
* If this thread tickled GEOM, we need to wait for the giggling to
* stop before we return to userland
Index: sys/mips/mips/trap.c
===================================================================
--- sys/mips/mips/trap.c
+++ sys/mips/mips/trap.c
@@ -438,6 +438,7 @@
printf("SYSCALL #%d pid:%u\n", sa->code, td->td_proc->p_pid);
#endif
+ syscall_read_barrier();
se = td->td_proc->p_sysent;
/*
* XXX
Index: sys/powerpc/powerpc/trap.c
===================================================================
--- sys/powerpc/powerpc/trap.c
+++ sys/powerpc/powerpc/trap.c
@@ -652,6 +652,7 @@
}
}
+ syscall_read_barrier();
if (sa->code >= p->p_sysent->sv_size)
sa->callp = &p->p_sysent->sv_table[0];
else
Index: sys/riscv/riscv/trap.c
===================================================================
--- sys/riscv/riscv/trap.c
+++ sys/riscv/riscv/trap.c
@@ -112,6 +112,7 @@
nap--;
}
+ syscall_read_barrier();
if (sa->code >= p->p_sysent->sv_size)
sa->callp = &p->p_sysent->sv_table[0];
else
Index: sys/sys/sysent.h
===================================================================
--- sys/sys/sysent.h
+++ sys/sys/sysent.h
@@ -62,6 +62,9 @@
#endif
extern systrace_probe_func_t systrace_probe_func;
+/*
+ * Make sure to update syscall_sysent_replace when modifying this structure.
+ */
struct sysent { /* system call table */
int sy_narg; /* number of arguments */
sy_call_t *sy_call; /* implementing function */
@@ -71,7 +74,7 @@
u_int32_t sy_entry; /* DTrace entry ID for systrace. */
u_int32_t sy_return; /* DTrace return ID for systrace. */
u_int32_t sy_flags; /* General flags for system calls. */
- u_int32_t sy_thrcnt;
+ u_int32_t sy_thrflags;
};
/*
@@ -79,11 +82,9 @@
*/
#define SYF_CAPENABLED 0x00000001
-#define SY_THR_FLAGMASK 0x7
+#define SY_THR_FLAGMASK 0x3
#define SY_THR_STATIC 0x1
-#define SY_THR_DRAINING 0x2
-#define SY_THR_ABSENT 0x4
-#define SY_THR_INCR 0x8
+#define SY_THR_ABSENT 0x2
#ifdef KLD_MODULE
#define SY_THR_STATIC_KLD 0
@@ -194,7 +195,7 @@
.sy_entry = 0, \
.sy_return = 0, \
.sy_flags = 0, \
- .sy_thrcnt = 0 \
+ .sy_thrflags = 0 \
}
#define MAKE_SYSENT(syscallname) \
@@ -292,26 +293,11 @@
int lkmnosys(struct thread *, struct nosys_args *);
int lkmressys(struct thread *, struct nosys_args *);
-int _syscall_thread_enter(struct thread *td, struct sysent *se);
-void _syscall_thread_exit(struct thread *td, struct sysent *se);
-
-static inline int
-syscall_thread_enter(struct thread *td, struct sysent *se)
-{
-
- if (__predict_true((se->sy_thrcnt & SY_THR_STATIC) != 0))
- return (0);
- return (_syscall_thread_enter(td, se));
-}
-
-static inline void
-syscall_thread_exit(struct thread *td, struct sysent *se)
-{
-
- if (__predict_true((se->sy_thrcnt & SY_THR_STATIC) != 0))
- return;
- _syscall_thread_exit(td, se);
-}
+/*
+ * See syscall_drain().
+ */
+#define syscall_read_barrier() __compiler_membar()
+#define syscall_exit(td) do { (td)->td_sa.code = 0; } while (0)
int shared_page_alloc(int size, int align);
int shared_page_fill(int size, int align, const void *data);

File Metadata

Mime Type
text/plain
Expires
Sat, Feb 22, 12:43 AM (2 h, 53 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16763561
Default Alt Text
D23587.diff (10 KB)

Event Timeline