Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F110620879
D23587.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
10 KB
Referenced Files
None
Subscribers
None
D23587.diff
View Options
Index: sys/amd64/amd64/trap.c
===================================================================
--- sys/amd64/amd64/trap.c
+++ sys/amd64/amd64/trap.c
@@ -999,6 +999,7 @@
regcnt--;
}
+ syscall_read_barrier();
if (sa->code >= p->p_sysent->sv_size)
sa->callp = &p->p_sysent->sv_table[0];
else
@@ -1041,6 +1042,7 @@
sa->code >= p->p_sysent->sv_size))
return (cpu_fetch_syscall_args_fallback(td, sa));
+ syscall_read_barrier();
sa->callp = &p->p_sysent->sv_table[sa->code];
sa->narg = sa->callp->sy_narg;
KASSERT(sa->narg <= nitems(sa->args), ("Too many syscall arguments!"));
Index: sys/arm/arm/syscall.c
===================================================================
--- sys/arm/arm/syscall.c
+++ sys/arm/arm/syscall.c
@@ -118,6 +118,7 @@
ap += 2;
}
p = td->td_proc;
+ syscall_read_barrier();
if (sa->code >= p->p_sysent->sv_size)
sa->callp = &p->p_sysent->sv_table[0];
else
Index: sys/arm64/arm64/trap.c
===================================================================
--- sys/arm64/arm64/trap.c
+++ sys/arm64/arm64/trap.c
@@ -136,6 +136,7 @@
nap--;
}
+ syscall_read_barrier();
if (sa->code >= p->p_sysent->sv_size)
sa->callp = &p->p_sysent->sv_table[0];
else
Index: sys/i386/i386/trap.c
===================================================================
--- sys/i386/i386/trap.c
+++ sys/i386/i386/trap.c
@@ -1085,6 +1085,7 @@
params += sizeof(quad_t);
}
+ syscall_read_barrier();
if (sa->code >= p->p_sysent->sv_size)
sa->callp = &p->p_sysent->sv_table[0];
else
Index: sys/kern/kern_syscalls.c
===================================================================
--- sys/kern/kern_syscalls.c
+++ sys/kern/kern_syscalls.c
@@ -41,6 +41,7 @@
#include <sys/sysent.h>
#include <sys/sysproto.h>
#include <sys/systm.h>
+#include <sys/smp.h>
#include <machine/atomic.h>
/*
@@ -64,44 +65,94 @@
}
static void
-syscall_thread_drain(struct sysent *se)
+syscall_wait_thread(struct thread *td, int code)
{
- u_int32_t cnt, oldcnt;
-
- do {
- oldcnt = se->sy_thrcnt;
- KASSERT((oldcnt & SY_THR_STATIC) == 0,
- ("drain on static syscall"));
- cnt = oldcnt | SY_THR_DRAINING;
- } while (atomic_cmpset_acq_32(&se->sy_thrcnt, oldcnt, cnt) == 0);
- while (atomic_cmpset_32(&se->sy_thrcnt, SY_THR_DRAINING,
- SY_THR_ABSENT) == 0)
+
+ while (atomic_load_int(&td->td_sa.code) == code)
pause("scdrn", hz/2);
}
-int
-_syscall_thread_enter(struct thread *td, struct sysent *se)
+static int
+syscall_wait(struct proc *p, int code, struct sx *lock)
{
- u_int32_t cnt, oldcnt;
-
- do {
- oldcnt = se->sy_thrcnt;
- if ((oldcnt & (SY_THR_DRAINING | SY_THR_ABSENT)) != 0)
- return (ENOSYS);
- cnt = oldcnt + SY_THR_INCR;
- } while (atomic_cmpset_acq_32(&se->sy_thrcnt, oldcnt, cnt) == 0);
+ struct thread *td;
+
+ PROC_LOCK(p);
+ FOREACH_THREAD_IN_PROC(p, td) {
+ if (atomic_load_int(&td->td_sa.code) != code)
+ continue;
+ sx_sunlock(lock);
+ PROC_UNLOCK(p);
+ syscall_wait_thread(td, code);
+ return (1);
+ }
+ PROC_UNLOCK(p);
return (0);
}
-void
-_syscall_thread_exit(struct thread *td, struct sysent *se)
+/*
+ * Observe all threads not executing the passed syscall.
+ *
+ * We are called when the func pointer is replaced with a dummy.
+ *
+ * All code preparing syscall execution uses syscall_read_barrier after setting
+ * the syscall number and before eading sysent. cpus_fence_seq_cst below both
+ * publishes our update and synchronizes against aforementioned consumers. This
+ * guarantees there will be no new threads executing the old syscall code (but
+ * there maybe new threads executing the newly installed ENOSYS handler).
+ *
+ * From then on we only have to observe all threads executing a different
+ * sysent (or no sysent in the first place).
+ *
+ * Note no effort is made to nudge forward a thread blocked on the syscall.
+ */
+static void
+syscall_drain(int code)
+{
+ struct proc *p;
+ int i, j;
+
+ cpus_fence_seq_cst();
+
+ for (i = 0; i < pidhashlock + 1; i++) {
+loop_unlocked:
+ sx_slock(&pidhashtbl_lock[i]);
+ for (j = i; j <= pidhash; j += pidhashlock + 1) {
+ LIST_FOREACH(p, &pidhashtbl[j], p_hash) {
+ if (p->p_state == PRS_NEW)
+ continue;
+ if (syscall_wait(p, code, &pidhashtbl_lock[i]))
+ /*
+ * Note this is likely to revisit
+ * the same process, which is fine.
+ * This avoids inserting a marker to
+ * make sure we did not miss anything.
+ */
+ goto loop_unlocked;
+ }
+ }
+ sx_sunlock(&pidhashtbl_lock[i]);
+ }
+}
+
+static void
+syscall_sysent_replace(struct sysent *se, struct sysent *old,
+ const struct sysent *new)
{
- u_int32_t cnt, oldcnt;
- do {
- oldcnt = se->sy_thrcnt;
- cnt = oldcnt - SY_THR_INCR;
- } while (atomic_cmpset_rel_32(&se->sy_thrcnt, oldcnt, cnt) == 0);
+ if (old != NULL)
+ *old = *se;
+
+ atomic_store_int(&se->sy_narg, new->sy_narg);
+ atomic_store_16(&se->sy_auevent, new->sy_auevent);
+ atomic_store_ptr((void *)&se->sy_systrace_args_func,
+ (uintptr_t)new->sy_systrace_args_func);
+ atomic_store_int(&se->sy_entry, new->sy_entry);
+ atomic_store_int(&se->sy_return, new->sy_return);
+ atomic_store_int(&se->sy_flags, new->sy_flags);
+ atomic_store_int(&se->sy_thrflags, new->sy_thrflags);
+ atomic_thread_fence_rel();
+ atomic_store_ptr((void *)&se->sy_call, (uintptr_t)new->sy_call);
}
int
@@ -129,12 +180,9 @@
return (EEXIST);
}
- KASSERT(sysents[*offset].sy_thrcnt == SY_THR_ABSENT,
+ KASSERT(sysents[*offset].sy_thrflags == SY_THR_ABSENT,
("dynamic syscall is not protected"));
- *old_sysent = sysents[*offset];
- new_sysent->sy_thrcnt = SY_THR_ABSENT;
- sysents[*offset] = *new_sysent;
- atomic_store_rel_32(&sysents[*offset].sy_thrcnt, flags);
+ syscall_sysent_replace(&sysents[*offset], old_sysent, new_sysent);
return (0);
}
@@ -148,10 +196,11 @@
return (0); /* XXX? */
se = &sysents[offset];
- if ((se->sy_thrcnt & SY_THR_STATIC) != 0)
+ if ((se->sy_thrflags & SY_THR_STATIC) != 0)
return (EINVAL);
- syscall_thread_drain(se);
- sysents[offset] = *old_sysent;
+ syscall_sysent_replace(se, NULL, old_sysent);
+ syscall_drain(offset);
+
return (0);
}
Index: sys/kern/kern_thr.c
===================================================================
--- sys/kern/kern_thr.c
+++ sys/kern/kern_thr.c
@@ -375,6 +375,7 @@
#ifdef AUDIT
AUDIT_SYSCALL_EXIT(0, td);
#endif
+ syscall_exit(td);
PROC_SLOCK(p);
thread_stopped(p);
Index: sys/kern/subr_syscall.c
===================================================================
--- sys/kern/subr_syscall.c
+++ sys/kern/subr_syscall.c
@@ -124,12 +124,6 @@
}
#endif
- error = syscall_thread_enter(td, sa->callp);
- if (error != 0) {
- td->td_errno = error;
- goto retval;
- }
-
/*
* Fetch fast sigblock value at the time of syscall
* entry because sleepqueue primitives might call
@@ -163,7 +157,6 @@
if (__predict_false((td->td_pflags & TDP_NERRNO) == 0))
td->td_errno = error;
}
- syscall_thread_exit(td, sa->callp);
retval:
KTR_STOP4(KTR_SYSC, "syscall", syscallname(p, sa->code),
Index: sys/kern/subr_trap.c
===================================================================
--- sys/kern/subr_trap.c
+++ sys/kern/subr_trap.c
@@ -137,6 +137,8 @@
td_softdep_cleanup(td);
MPASS(td->td_su == NULL);
+ syscall_exit(td);
+
/*
* If this thread tickled GEOM, we need to wait for the giggling to
* stop before we return to userland
Index: sys/mips/mips/trap.c
===================================================================
--- sys/mips/mips/trap.c
+++ sys/mips/mips/trap.c
@@ -438,6 +438,7 @@
printf("SYSCALL #%d pid:%u\n", sa->code, td->td_proc->p_pid);
#endif
+ syscall_read_barrier();
se = td->td_proc->p_sysent;
/*
* XXX
Index: sys/powerpc/powerpc/trap.c
===================================================================
--- sys/powerpc/powerpc/trap.c
+++ sys/powerpc/powerpc/trap.c
@@ -652,6 +652,7 @@
}
}
+ syscall_read_barrier();
if (sa->code >= p->p_sysent->sv_size)
sa->callp = &p->p_sysent->sv_table[0];
else
Index: sys/riscv/riscv/trap.c
===================================================================
--- sys/riscv/riscv/trap.c
+++ sys/riscv/riscv/trap.c
@@ -112,6 +112,7 @@
nap--;
}
+ syscall_read_barrier();
if (sa->code >= p->p_sysent->sv_size)
sa->callp = &p->p_sysent->sv_table[0];
else
Index: sys/sys/sysent.h
===================================================================
--- sys/sys/sysent.h
+++ sys/sys/sysent.h
@@ -62,6 +62,9 @@
#endif
extern systrace_probe_func_t systrace_probe_func;
+/*
+ * Make sure to update syscall_sysent_replace when modifying this structure.
+ */
struct sysent { /* system call table */
int sy_narg; /* number of arguments */
sy_call_t *sy_call; /* implementing function */
@@ -71,7 +74,7 @@
u_int32_t sy_entry; /* DTrace entry ID for systrace. */
u_int32_t sy_return; /* DTrace return ID for systrace. */
u_int32_t sy_flags; /* General flags for system calls. */
- u_int32_t sy_thrcnt;
+ u_int32_t sy_thrflags;
};
/*
@@ -79,11 +82,9 @@
*/
#define SYF_CAPENABLED 0x00000001
-#define SY_THR_FLAGMASK 0x7
+#define SY_THR_FLAGMASK 0x3
#define SY_THR_STATIC 0x1
-#define SY_THR_DRAINING 0x2
-#define SY_THR_ABSENT 0x4
-#define SY_THR_INCR 0x8
+#define SY_THR_ABSENT 0x2
#ifdef KLD_MODULE
#define SY_THR_STATIC_KLD 0
@@ -194,7 +195,7 @@
.sy_entry = 0, \
.sy_return = 0, \
.sy_flags = 0, \
- .sy_thrcnt = 0 \
+ .sy_thrflags = 0 \
}
#define MAKE_SYSENT(syscallname) \
@@ -292,26 +293,11 @@
int lkmnosys(struct thread *, struct nosys_args *);
int lkmressys(struct thread *, struct nosys_args *);
-int _syscall_thread_enter(struct thread *td, struct sysent *se);
-void _syscall_thread_exit(struct thread *td, struct sysent *se);
-
-static inline int
-syscall_thread_enter(struct thread *td, struct sysent *se)
-{
-
- if (__predict_true((se->sy_thrcnt & SY_THR_STATIC) != 0))
- return (0);
- return (_syscall_thread_enter(td, se));
-}
-
-static inline void
-syscall_thread_exit(struct thread *td, struct sysent *se)
-{
-
- if (__predict_true((se->sy_thrcnt & SY_THR_STATIC) != 0))
- return;
- _syscall_thread_exit(td, se);
-}
+/*
+ * See syscall_drain().
+ */
+#define syscall_read_barrier() __compiler_membar()
+#define syscall_exit(td) do { (td)->td_sa.code = 0; } while (0)
int shared_page_alloc(int size, int align);
int shared_page_fill(int size, int align, const void *data);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Feb 22, 12:43 AM (2 h, 53 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16763561
Default Alt Text
D23587.diff (10 KB)
Attached To
Mode
D23587: Stop counting threads executing dynamic syscalls
Attached
Detach File
Event Timeline
Log In to Comment