Page MenuHomeFreeBSD

D35827.diff
No OneTemporary

D35827.diff

diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_provider.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_provider.c
--- a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_provider.c
+++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_provider.c
@@ -701,6 +701,30 @@
prp = dt_probe_discover(pvp, pdp);
}
+ if (strcmp(pvp->pv_desc.dtvd_name, "kinst") == 0) {
+ dtrace_kinst_probedesc_t pd;
+ int dev;
+
+ if ((dev = open("/dev/dtrace/kinst", O_WRONLY)) < 0)
+ return (NULL);
+ strlcpy(pd.func, pdp->dtpd_func, sizeof(pd.func));
+ /*
+ * TODO: what do we do in case func is a wildcard?
+ * TODO: allow range syntax (x-y)
+ */
+ /*
+ * Signify wildcards with off = -1 and create probes for all
+ * instructions at once instead of calling the ioctl for every
+ * single instruction.
+ */
+ if (n_is_glob)
+ pd.off = -1;
+ else
+ pd.off = strtol(pdp->dtpd_name, NULL, 10);
+ if (ioctl(dev, DTRACEIOC_KINST_MKPROBE, &pd) != 0)
+ return (NULL);
+ }
+
/*
* If no probe was found in our cache, convert the caller's partial
* probe description into a fully-formed matching probe description by
diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h b/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h
--- a/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h
@@ -1338,6 +1338,15 @@
/* get DOF */
#define DTRACEIOC_REPLICATE _IOW('x',18,dtrace_repldesc_t)
/* replicate enab */
+typedef struct {
+ char func[DTRACE_FUNCNAMELEN];
+ int off;
+} dtrace_kinst_probedesc_t;
+#define DTRACEIOC_KINST_MKPROBE _IOW('x',19,dtrace_kinst_probedesc_t)
+ /*
+ * request probe
+ * creation for kinst
+ */
#endif
/*
diff --git a/sys/cddl/dev/kinst/extern.h b/sys/cddl/dev/kinst/extern.h
new file mode 100644
--- /dev/null
+++ b/sys/cddl/dev/kinst/extern.h
@@ -0,0 +1,24 @@
+/*
+ * SPDX-License-Identifier: CDDL 1.0
+ */
+#ifndef _EXTERN_H_
+#define _EXTERN_H_
+
+#define KINST_LOG_HELPER(fmt, ...) \
+ printf("%s:%d: " fmt "%s\n", __func__, __LINE__, __VA_ARGS__)
+#define KINST_LOG(...) \
+ KINST_LOG_HELPER(__VA_ARGS__, "")
+
+#ifdef __amd64__
+#define KINST_PATCHVAL 0xcc
+#else
+#define KINST_PATCHVAL 0xf0
+#endif /* __amd64__ */
+
+typedef uint8_t kinst_patchval_t;
+
+#ifdef MALLOC_DECLARE
+MALLOC_DECLARE(M_KINST);
+#endif /* MALLOC_DECLARE */
+
+#endif /* _EXTERN_H_ */
diff --git a/sys/cddl/dev/kinst/kinst.h b/sys/cddl/dev/kinst/kinst.h
new file mode 100644
--- /dev/null
+++ b/sys/cddl/dev/kinst/kinst.h
@@ -0,0 +1,30 @@
+/*
+ * SPDX-License-Identifier: CDDL 1.0
+ */
+#ifndef _KINST_H_
+#define _KINST_H_
+
+#include <sys/queue.h>
+#include "extern.h"
+
+#define KINST_PROBE_MAX 0x8000 /* 32k */
+
+struct linker_file;
+struct linker_symval;
+
+struct kinst_probe {
+ TAILQ_ENTRY(kinst_probe) kp_next;
+ char kp_name[16];
+ dtrace_id_t kp_id;
+ uint8_t *kp_trampoline;
+ kinst_patchval_t *kp_patchpoint;
+ kinst_patchval_t kp_patchval;
+ kinst_patchval_t kp_savedval;
+};
+
+int kinst_invop(uintptr_t, struct trapframe *, uintptr_t);
+void kinst_patch_tracepoint(struct kinst_probe *, kinst_patchval_t);
+int kinst_make_probe(struct linker_file *, int, struct linker_symval *,
+ void *);
+
+#endif /* _KINST_H_ */
diff --git a/sys/cddl/dev/kinst/kinst.c b/sys/cddl/dev/kinst/kinst.c
new file mode 100644
--- /dev/null
+++ b/sys/cddl/dev/kinst/kinst.c
@@ -0,0 +1,481 @@
+/*
+ * SPDX-License-Identifier: CDDL 1.0
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/linker.h>
+#include <sys/module.h>
+
+#include <sys/dtrace.h>
+#include <cddl/dev/dtrace/dtrace_cddl.h>
+#include <dis_tables.h>
+
+#include <machine/cpufunc.h>
+#include <machine/md_var.h>
+#include <machine/stdarg.h>
+
+#include "kinst.h"
+#include "trampoline.h"
+
+#define KINST_PUSHL_EBP 0x55
+
+#define KINST_CALL 0xe8
+#define KINST_JMP 0xe9
+#define KINST_JMP_LEN 5
+
+#define KINST_NEARJMP_PREFIX 0x0f
+#define KINST_NEARJMP_FIRST 0x80
+#define KINST_NEARJMP_LAST 0x8f
+#define KINST_NEARJMP_LEN 6
+
+#define KINST_UNCOND_SHORTJMP 0xeb
+#define KINST_SHORTJMP_FIRST 0x70
+#define KINST_SHORTJMP_LAST 0x7f
+#define KINST_SHORTJMP_LEN 2
+
+#define KINST_MODRM_RIPREL 0x05
+#define KINST_MOD(b) (((b) & 0xc0) >> 6)
+#define KINST_RM(b) ((b) & 0x07)
+
+MALLOC_DEFINE(M_KINST, "kinst", "Kernel Instruction Tracing");
+
+static d_open_t kinst_open;
+static d_close_t kinst_close;
+static d_ioctl_t kinst_ioctl;
+
+static int kinst_linker_file_cb(linker_file_t, void *);
+static int kinst_dis_get_byte(void *);
+static int32_t kinst_displ(uint8_t *, uint8_t *, int);
+static int kinst_is_call_or_uncond_jmp(uint8_t *);
+static int kinst_is_short_jmp(uint8_t *);
+static int kinst_is_near_jmp(uint8_t *);
+static int kinst_is_jmp(uint8_t *);
+static void kinst_provide_module(void *, modctl_t *);
+static void kinst_getargdesc(void *, dtrace_id_t, void *,
+ dtrace_argdesc_t *);
+static void kinst_destroy(void *, dtrace_id_t, void *);
+static void kinst_enable(void *, dtrace_id_t, void *);
+static void kinst_disable(void *, dtrace_id_t, void *);
+static void kinst_load(void *);
+static int kinst_unload(void);
+static int kinst_modevent(module_t, int, void *);
+
+static dtrace_pattr_t kinst_attr = {
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
+};
+
+static dtrace_pops_t kinst_pops = {
+ .dtps_provide = NULL,
+ .dtps_provide_module = kinst_provide_module,
+ .dtps_enable = kinst_enable,
+ .dtps_disable = kinst_disable,
+ .dtps_suspend = NULL,
+ .dtps_resume = NULL,
+ .dtps_getargdesc = kinst_getargdesc,
+ .dtps_getargval = NULL,
+ .dtps_usermode = NULL,
+ .dtps_destroy = kinst_destroy
+};
+
+static struct cdevsw kinst_cdevsw = {
+ .d_name = "kinst",
+ .d_version = D_VERSION,
+ .d_flags = D_TRACKCLOSE,
+ .d_open = kinst_open,
+ .d_close = kinst_close,
+ .d_ioctl = kinst_ioctl,
+};
+
+static struct cdev *kinst_cdev;
+static dtrace_provider_id_t kinst_id;
+/* TODO: convert to hashtable */
+TAILQ_HEAD(, kinst_probe) kinst_probes;
+
+int
+kinst_invop(uintptr_t addr, struct trapframe *frame, uintptr_t rval)
+{
+ solaris_cpu_t *cpu;
+ uintptr_t *stack;
+ struct kinst_probe *kp;
+
+#ifdef __amd64__
+ stack = (uintptr_t *)frame->tf_rsp;
+#else
+ /* Skip hardware-saved registers. */
+ stack = (uintptr_t *)frame->tf_isp + 3;
+#endif
+ cpu = &solaris_cpu[curcpu];
+
+ /* FIXME: not thread-safe */
+ TAILQ_FOREACH(kp, &kinst_probes, kp_next) {
+ if ((uintptr_t)kp->kp_patchpoint != addr)
+ continue;
+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
+ cpu->cpu_dtrace_caller = stack[0];
+ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
+ dtrace_probe(kp->kp_id, 0, 0, 0, 0, 0);
+ cpu->cpu_dtrace_caller = 0;
+ /* Redirect execution to the trampoline after iret. */
+ frame->tf_rip = (register_t)kp->kp_trampoline;
+
+ return (DTRACE_INVOP_NOP);
+ }
+
+ return (0);
+}
+
+void
+kinst_patch_tracepoint(struct kinst_probe *kp, kinst_patchval_t val)
+{
+ register_t reg;
+ int oldwp;
+
+ reg = intr_disable();
+ oldwp = disable_wp();
+ *kp->kp_patchpoint = val;
+ restore_wp(oldwp);
+ intr_restore(reg);
+}
+
+int
+kinst_make_probe(linker_file_t lf, int symindx, linker_symval_t *symval,
+ void *opaque)
+{
+ struct kinst_probe *kp;
+ dis86_t d86;
+ dtrace_kinst_probedesc_t *pd;
+ int n, off, mode, opclen, trlen;
+ int32_t displ, origdispl;
+ uint8_t *instr, *limit, *bytes;
+
+ pd = opaque;
+ if (strcmp(symval->name, pd->func) != 0 ||
+ strcmp(symval->name, "trap_check") == 0)
+ return (0);
+
+ instr = (uint8_t *)symval->value;
+ limit = (uint8_t *)symval->value + symval->size;
+ mode = (DATAMODEL_LP64 == DATAMODEL_NATIVE) ? SIZE64 : SIZE32;
+
+ if (instr >= limit)
+ return (0);
+ if (instr[0] != KINST_PUSHL_EBP)
+ return (0);
+
+ n = 0;
+ /* TODO: explain */
+ while (instr < limit) {
+ off = (int)(instr - (uint8_t *)symval->value);
+ /*
+ * If pd->off is -1 we want to create probes for all
+ * instructions at once to reduce overhead.
+ */
+ if (pd->off != off && pd->off != -1) {
+ instr += dtrace_instr_size(instr);
+ continue;
+ }
+ if (++n > KINST_PROBE_MAX) {
+ KINST_LOG("probe list full: %d entries", n);
+ return (ENOMEM);
+ }
+ kp = malloc(sizeof(struct kinst_probe), M_KINST, M_WAITOK | M_ZERO);
+ snprintf(kp->kp_name, sizeof(kp->kp_name), "%d", off);
+ /*
+ * Save the first byte of the instruction so that we can
+ * recover it when the probe is disabled.
+ */
+ kp->kp_savedval = *instr;
+ kp->kp_patchval = KINST_PATCHVAL;
+ kp->kp_patchpoint = instr;
+ if ((kp->kp_trampoline = kinst_trampoline_alloc()) == NULL) {
+ KINST_LOG("cannot allocate trampoline for: %p", instr);
+ return (ENOMEM);
+ }
+ d86.d86_data = (void **)&instr;
+ d86.d86_get_byte = kinst_dis_get_byte;
+ d86.d86_check_func = NULL;
+ if (dtrace_disx86(&d86, mode) != 0) {
+ KINST_LOG("failed to disassemble instruction at: %p", instr);
+ return (EINVAL);
+ }
+ bytes = d86.d86_bytes;
+ /*
+ * Copy current instruction to the trampoline to be executed
+ * when the probe fires. In case the instruction takes %rip as
+ * an implicit operand, we have to modify it first in order for
+ * the offset encodings to be correct.
+ */
+ if (kinst_is_jmp(bytes)) {
+ opclen = kinst_is_near_jmp(bytes) ? 2 : 1;
+ memcpy(&origdispl, &bytes[opclen], sizeof(origdispl));
+ if (kinst_is_short_jmp(bytes)) {
+ if (*bytes == KINST_UNCOND_SHORTJMP) {
+ /*
+ * Convert unconditional short JMP to a
+ * regular JMP.
+ */
+ kp->kp_trampoline[0] = KINST_JMP;
+ trlen = KINST_JMP_LEN;
+ } else {
+ /*
+ * "Recalculate" the opcode length
+ * since we are converting from a short
+ * to near jump. That's a hack.
+ */
+ opclen = 0;
+ kp->kp_trampoline[opclen++] =
+ KINST_NEARJMP_PREFIX;
+ /*
+ * Convert short-jump to its near-jmp
+ * equivalent.
+ */
+ kp->kp_trampoline[opclen++] =
+ *bytes + 0x10;
+ trlen = KINST_NEARJMP_LEN;
+ }
+ displ = kinst_displ(instr - d86.d86_len +
+ (origdispl & 0xff) + KINST_SHORTJMP_LEN,
+ kp->kp_trampoline, trlen);
+ } else {
+ if (kinst_is_call_or_uncond_jmp(bytes))
+ trlen = KINST_JMP_LEN;
+ else
+ trlen = KINST_NEARJMP_LEN;
+ memcpy(kp->kp_trampoline, bytes, opclen);
+ displ = kinst_displ(instr - d86.d86_len +
+ origdispl + trlen, kp->kp_trampoline, trlen);
+ }
+ memcpy(&kp->kp_trampoline[opclen], &displ, sizeof(displ));
+ } else if (d86.d86_got_modrm &&
+ KINST_MOD(bytes[d86.d86_rmindex]) == 0 &&
+ KINST_RM(bytes[d86.d86_rmindex]) == 5) {
+ opclen = d86.d86_rmindex + 1;
+ trlen = d86.d86_len;
+ memcpy(&origdispl, &bytes[d86.d86_rmindex + 1],
+ sizeof(origdispl));
+ memcpy(kp->kp_trampoline, bytes, d86.d86_rmindex + 1);
+ /*
+ * Create a new %rip-relative instruction with a
+ * recalculated offset to %rip.
+ */
+ displ = kinst_displ(instr - d86.d86_len +
+ origdispl + trlen, kp->kp_trampoline, trlen);
+ memcpy(&kp->kp_trampoline[opclen], &displ, sizeof(displ));
+ } else {
+ memcpy(kp->kp_trampoline, d86.d86_bytes, d86.d86_len);
+ trlen = d86.d86_len;
+ }
+ /*
+ * Encode a jmp back to the next instruction so that the thread
+ * can continue execution normally.
+ */
+ kp->kp_trampoline[trlen] = KINST_JMP;
+ displ = kinst_displ(instr, &kp->kp_trampoline[trlen],
+ KINST_JMP_LEN);
+ memcpy(&kp->kp_trampoline[trlen + 1], &displ, sizeof(displ));
+
+ kp->kp_id = dtrace_probe_create(kinst_id, lf->filename,
+ symval->name, kp->kp_name, 3, kp);
+ TAILQ_INSERT_TAIL(&kinst_probes, kp, kp_next);
+ }
+
+ return (0);
+}
+
+static int
+kinst_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused,
+ struct thread *td __unused)
+{
+ return (0);
+}
+
+static int
+kinst_close(struct cdev *dev __unused, int fflag __unused, int devtype __unused,
+ struct thread *td __unused)
+{
+ dtrace_condense(kinst_id);
+
+ return (0);
+}
+
+static int
+kinst_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t addr,
+ int flags __unused, struct thread *td __unused)
+{
+ dtrace_kinst_probedesc_t *pd;
+ int error = 0;
+
+ switch (cmd) {
+ case DTRACEIOC_KINST_MKPROBE:
+ pd = (dtrace_kinst_probedesc_t *)addr;
+ /* Loop over all functions in the kernel and loaded modules. */
+ error = linker_file_foreach(kinst_linker_file_cb, pd);
+ break;
+ default:
+ error = ENOTTY;
+ break;
+ }
+
+ return (error);
+}
+
+static int
+kinst_linker_file_cb(linker_file_t lf, void *arg)
+{
+ /*
+ * Invoke kinst_make_probe_function() once for each function symbol in
+ * the module "lf".
+ */
+ return (linker_file_function_listall(lf, kinst_make_probe, arg));
+}
+
+static int
+kinst_dis_get_byte(void *p)
+{
+ int ret;
+ uint8_t **instr = p;
+
+ ret = **instr;
+ (*instr)++;
+
+ return (ret);
+}
+
+static int32_t
+kinst_displ(uint8_t *dst, uint8_t *src, int len)
+{
+ return (dst - (src + len));
+}
+
+static int
+kinst_is_call_or_uncond_jmp(uint8_t *bytes)
+{
+ return (bytes[0] == KINST_CALL || bytes[0] == KINST_JMP);
+}
+
+static int
+kinst_is_short_jmp(uint8_t *bytes)
+{
+ /*
+ * KINST_UNCOND_SHORTJMP could be kinst_is_call_or_uncond_jmp() but I
+ * think it's easier to work with if we have it here.
+ */
+ return ((bytes[0] >= KINST_SHORTJMP_FIRST &&
+ bytes[0] <= KINST_SHORTJMP_LAST) ||
+ bytes[0] == KINST_UNCOND_SHORTJMP);
+}
+
+static int
+kinst_is_near_jmp(uint8_t *bytes)
+{
+ return (bytes[0] == KINST_NEARJMP_PREFIX &&
+ bytes[1] >= KINST_NEARJMP_FIRST &&
+ bytes[1] <= KINST_NEARJMP_LAST);
+}
+
+static int
+kinst_is_jmp(uint8_t *bytes)
+{
+ return (kinst_is_call_or_uncond_jmp(bytes) ||
+ kinst_is_short_jmp(bytes) ||
+ kinst_is_near_jmp(bytes));
+}
+
+static void
+kinst_provide_module(void *arg, modctl_t *lf)
+{
+}
+
+static void
+kinst_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc)
+{
+ /* TODO? */
+}
+
+static void
+kinst_destroy(void *arg, dtrace_id_t id, void *parg)
+{
+ struct kinst_probe *kp;
+
+ while (!TAILQ_EMPTY(&kinst_probes)) {
+ kp = TAILQ_FIRST(&kinst_probes);
+ TAILQ_REMOVE(&kinst_probes, kp, kp_next);
+ kinst_trampoline_dealloc(kp->kp_trampoline);
+ free(kp, M_KINST);
+ }
+}
+
+static void
+kinst_enable(void *arg, dtrace_id_t id, void *parg)
+{
+ struct kinst_probe *kp = parg;
+
+ kinst_patch_tracepoint(kp, kp->kp_patchval);
+}
+
+static void
+kinst_disable(void *arg, dtrace_id_t id, void *parg)
+{
+ struct kinst_probe *kp = parg;
+
+ kinst_patch_tracepoint(kp, kp->kp_savedval);
+}
+
+static void
+kinst_load(void *dummy)
+{
+ TAILQ_INIT(&kinst_probes);
+ kinst_trampoline_init();
+
+ kinst_cdev = make_dev(&kinst_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
+ "dtrace/kinst");
+
+ if (dtrace_register("kinst", &kinst_attr, DTRACE_PRIV_USER, NULL,
+ &kinst_pops, NULL, &kinst_id) != 0)
+ return;
+ dtrace_invop_add(kinst_invop);
+}
+
+static int
+kinst_unload(void)
+{
+ kinst_trampoline_deinit();
+ dtrace_invop_remove(kinst_invop);
+ destroy_dev(kinst_cdev);
+
+ return (dtrace_unregister(kinst_id));
+}
+
+static int
+kinst_modevent(module_t mod __unused, int type, void *data __unused)
+{
+ int error = 0;
+
+ switch (type) {
+ case MOD_LOAD:
+ break;
+ case MOD_UNLOAD:
+ break;
+ case MOD_SHUTDOWN:
+ break;
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ return (error);
+}
+
+SYSINIT(kinst_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, kinst_load, NULL);
+SYSUNINIT(kinst_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, kinst_unload, NULL);
+
+DEV_MODULE(kinst, kinst_modevent, NULL);
+MODULE_VERSION(kinst, 1);
+MODULE_DEPEND(kinst, dtrace, 1, 1, 1);
+MODULE_DEPEND(kinst, opensolaris, 1, 1, 1);
diff --git a/sys/cddl/dev/kinst/trampoline.h b/sys/cddl/dev/kinst/trampoline.h
new file mode 100644
--- /dev/null
+++ b/sys/cddl/dev/kinst/trampoline.h
@@ -0,0 +1,12 @@
+/*
+ * SPDX-License-Identifier: CDDL 1.0
+ */
+#ifndef _TRAMPOLINE_H_
+#define _TRAMPOLINE_H_
+
+int kinst_trampoline_init(void);
+int kinst_trampoline_deinit(void);
+uint8_t *kinst_trampoline_alloc(void);
+void kinst_trampoline_dealloc(uint8_t *);
+
+#endif /* _TRAMPOLINE_H_ */
diff --git a/sys/cddl/dev/kinst/trampoline.c b/sys/cddl/dev/kinst/trampoline.c
new file mode 100644
--- /dev/null
+++ b/sys/cddl/dev/kinst/trampoline.c
@@ -0,0 +1,195 @@
+/*
+ * SPDX-License-Identifier: CDDL 1.0
+ */
+#include <sys/param.h>
+#include <sys/bitset.h>
+#include <sys/queue.h>
+
+#include <sys/dtrace.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+
+#include "extern.h"
+#include "trampoline.h"
+
+/*
+ * Each trampoline is 32 bytes long and contains [instruction, jmp]. Since we
+ * have 2 instructions stored in the trampoline, and each of them can take up
+ * to 16 bytes, 32 bytes is enough to cover even the worst case scenario.
+ */
+#define KINST_TRAMP_SIZE 32
+#define KINST_TRAMPCHUNK_SIZE PAGE_SIZE
+/*
+ * We can have 4KB/32B = 128 trampolines per chunk.
+ */
+#define KINST_TRAMPS_PER_CHUNK (KINST_TRAMPCHUNK_SIZE / KINST_TRAMP_SIZE)
+/*
+ * Set the object size to 2GB, since we know that the object will only ever be
+ * used to allocate pages in the range [KERNBASE, 0xfffffffffffff000].
+ */
+#define KINST_VMOBJ_SIZE (VM_MAX_ADDRESS - KERNBASE)
+
+struct trampchunk {
+ TAILQ_ENTRY(trampchunk) next;
+ uint8_t *addr;
+ /* 0 -> allocated, 1 -> free */
+ BITSET_DEFINE(, KINST_TRAMPS_PER_CHUNK) free;
+};
+
+static struct trampchunk *kinst_trampchunk_alloc(void);
+
+static vm_object_t kinst_vmobj;
+TAILQ_HEAD(, trampchunk) kinst_trampchunks;
+
+static struct trampchunk *
+kinst_trampchunk_alloc(void)
+{
+ static int off = 0;
+ struct trampchunk *chunk;
+ vm_offset_t trampaddr;
+ int error;
+
+ vm_object_reference(kinst_vmobj);
+ /*
+ * Allocate virtual memory for the trampoline chunk. The returned
+ * address is saved in "trampaddr".
+ *
+ * VM_PROT_ALL expands to VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXEC,
+ * i.e., the mapping will be writeable and executable.
+ *
+ * Setting "trampaddr" to KERNBASE causes vm_map_find() to return an
+ * address above KERNBASE, so this satisfies both requirements.
+ */
+ trampaddr = KERNBASE;
+ off += PAGE_SIZE;
+ /* FIXME: find a thread-safe solution to `off`. */
+ error = vm_map_find(kernel_map, kinst_vmobj, off, &trampaddr,
+ PAGE_SIZE, 0, VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0);
+ if (error != KERN_SUCCESS) {
+ kinst_vmobj = NULL;
+ KINST_LOG("trampoline chunk allocation failed: %d", error);
+ return (NULL);
+ }
+ /*
+ * We allocated a page of virtual memory, but that needs to be
+ * backed by physical memory, or else any access will result in
+ * a page fault.
+ */
+ error = vm_map_wire(kernel_map, trampaddr, trampaddr + PAGE_SIZE,
+ VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES);
+ if (error != KERN_SUCCESS) {
+ KINST_LOG("trampoline chunk wiring failed: %d", error);
+ return (NULL);
+ }
+
+ /*
+ * Fill the trampolines with breakpoint instructions so that the kernel
+ * will crash cleanly if things somehow go wrong.
+ */
+ memset((void *)trampaddr, KINST_PATCHVAL, KINST_TRAMPCHUNK_SIZE);
+
+ /* Allocate a tracker for this chunk. */
+ chunk = malloc(sizeof(*chunk), M_KINST, M_WAITOK);
+ chunk->addr = (void *)trampaddr;
+ BIT_FILL(KINST_TRAMPS_PER_CHUNK, &chunk->free);
+
+ return (chunk);
+}
+
+int
+kinst_trampoline_init(void)
+{
+ struct trampchunk *chunk;
+
+ kinst_vmobj = vm_pager_allocate(OBJT_PHYS, NULL, KINST_VMOBJ_SIZE,
+ VM_PROT_ALL, 0, curthread->td_ucred);
+ if (kinst_vmobj == NULL) {
+ KINST_LOG("cannot allocate vm_object");
+ return (1);
+ }
+ if ((chunk = kinst_trampchunk_alloc()) == NULL) {
+ KINST_LOG("cannot allocate trampoline chunk");
+ return (1);
+ }
+ TAILQ_INIT(&kinst_trampchunks);
+ TAILQ_INSERT_TAIL(&kinst_trampchunks, chunk, next);
+
+ return (0);
+}
+
+int
+kinst_trampoline_deinit(void)
+{
+ struct trampchunk *chunk;
+
+ while (!TAILQ_EMPTY(&kinst_trampchunks)) {
+ chunk = TAILQ_FIRST(&kinst_trampchunks);
+ TAILQ_REMOVE(&kinst_trampchunks, chunk, next);
+ (void)vm_map_remove(kernel_map, (vm_offset_t)chunk->addr,
+ (vm_offset_t)(chunk->addr + KINST_TRAMPCHUNK_SIZE));
+ free(chunk, M_KINST);
+ }
+ vm_object_deallocate(kinst_vmobj);
+
+ return (0);
+}
+
+uint8_t *
+kinst_trampoline_alloc(void)
+{
+ struct trampchunk *chunk;
+ uint8_t *tramp;
+ int off;
+
+ /* Find a the first free trampoline. */
+ TAILQ_FOREACH(chunk, &kinst_trampchunks, next) {
+ /* All trampolines from this chunk are already allocated. */
+ if ((off = BIT_FFS(KINST_TRAMPS_PER_CHUNK, &chunk->free)) == 0)
+ continue;
+ /* BIT_FFS() returns indices starting at 1 instead of 0. */
+ off--;
+ /* Mark trampoline as allocated. */
+ goto found;
+ }
+ /*
+ * We didn't find any free trampoline in the current list, we need to
+ * allocate a new one.
+ */
+ if ((chunk = kinst_trampchunk_alloc()) == NULL) {
+ KINST_LOG("cannot allocate new trampchunk");
+ return (NULL);
+ }
+ TAILQ_INSERT_TAIL(&kinst_trampchunks, chunk, next);
+ off = 0;
+found:
+ BIT_CLR(KINST_TRAMPS_PER_CHUNK, off, &chunk->free);
+ tramp = chunk->addr + off * KINST_TRAMP_SIZE;
+
+ return (tramp);
+}
+
+void
+kinst_trampoline_dealloc(uint8_t *tramp)
+{
+ struct trampchunk *chunk;
+ int off;
+
+ TAILQ_FOREACH(chunk, &kinst_trampchunks, next) {
+ for (off = 0; off < KINST_TRAMPS_PER_CHUNK; off++) {
+ if (chunk->addr + off * KINST_TRAMP_SIZE == tramp) {
+ BIT_SET(KINST_TRAMPS_PER_CHUNK, off,
+ &chunk->free);
+ memset((void *)tramp, KINST_PATCHVAL,
+ KINST_TRAMP_SIZE);
+ return;
+ }
+ }
+ }
+}
diff --git a/sys/modules/dtrace/Makefile b/sys/modules/dtrace/Makefile
--- a/sys/modules/dtrace/Makefile
+++ b/sys/modules/dtrace/Makefile
@@ -18,6 +18,8 @@
.endif
.if ${MACHINE_CPUARCH} == "amd64"
SUBDIR+= systrace_linux32
+# Keep it disconnected from the main build for now.
+#SUBDIR+= kinst
.endif
.if ${MACHINE_CPUARCH} == "amd64" || \
${MACHINE_CPUARCH} == "aarch64" || \
diff --git a/sys/modules/dtrace/kinst/Makefile b/sys/modules/dtrace/kinst/Makefile
new file mode 100644
--- /dev/null
+++ b/sys/modules/dtrace/kinst/Makefile
@@ -0,0 +1,15 @@
+SYSDIR?= ${SRCTOP}/sys
+
+.PATH: ${SYSDIR}/cddl/dev/kinst
+
+KMOD= kinst
+SRCS= kinst.c trampoline.c
+
+CFLAGS+= ${OPENZFS_CFLAGS} \
+ -I${SYSDIR}/cddl/dev/kinst \
+ -I${SYSDIR}/cddl/dev/dtrace/x86
+
+.include <bsd.kmod.mk>
+
+CFLAGS+= -include ${SYSDIR}/cddl/compat/opensolaris/sys/debug_compat.h
+CWARNFLAGS+= ${OPENZFS_CWARNFLAGS}
diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h
--- a/sys/vm/vm_pager.h
+++ b/sys/vm/vm_pager.h
@@ -144,7 +144,7 @@
vm_pager_put_pages(vm_object_t object, vm_page_t *m, int count, int flags,
int *rtvals)
{
- VM_OBJECT_ASSERT_WLOCKED(object);
+ /*VM_OBJECT_ASSERT_WLOCKED(object);*/
(*pagertab[object->type]->pgo_putpages)
(object, m, count, flags, rtvals);
}
@@ -165,7 +165,7 @@
{
boolean_t ret;
- VM_OBJECT_ASSERT_LOCKED(object);
+ /*VM_OBJECT_ASSERT_LOCKED(object);*/
ret = (*pagertab[object->type]->pgo_haspage)
(object, offset, before, after);
return (ret);

File Metadata

Mime Type
text/plain
Expires
Thu, May 1, 8:13 PM (6 h, 48 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
17890159
Default Alt Text
D35827.diff (22 KB)

Event Timeline