Page MenuHomeFreeBSD

D34259.diff
No OneTemporary

D34259.diff

diff --git a/usr.bin/systat/Makefile b/usr.bin/systat/Makefile
--- a/usr.bin/systat/Makefile
+++ b/usr.bin/systat/Makefile
@@ -4,7 +4,7 @@
.include <src.opts.mk>
PROG= systat
-SRCS= cmds.c cmdtab.c devs.c fetch.c iostat.c keyboard.c main.c sysput.c \
+SRCS= cmds.c cmdtab.c devs.c fetch.c iolat.c iostat.c keyboard.c main.c sysput.c \
netcmds.c netstat.c pigs.c proc.c swap.c icmp.c \
mode.c ip.c sctp.c tcp.c zarc.c \
vmstat.c convtbl.c ifcmds.c ifstat.c
diff --git a/usr.bin/systat/cmdtab.c b/usr.bin/systat/cmdtab.c
--- a/usr.bin/systat/cmdtab.c
+++ b/usr.bin/systat/cmdtab.c
@@ -83,6 +83,9 @@
{ "zarc", showzarc, fetchzarc, labelzarc,
initzarc, openzarc, closezarc, 0,
resetzarc, CF_ZFSARC },
+ { "iolat", showiolat, fetchiolat, labeliolat,
+ initiolat, openiolat, closeiolat, cmdiolat,
+ 0, CF_LOADAV },
{ NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0 }
};
struct cmdtab *curcmd = &cmdtab[0];
diff --git a/usr.bin/systat/extern.h b/usr.bin/systat/extern.h
--- a/usr.bin/systat/extern.h
+++ b/usr.bin/systat/extern.h
@@ -88,6 +88,7 @@
void closeswap(WINDOW *);
void closetcp(WINDOW *);
int cmdifstat(const char *, const char *);
+int cmdiolat(const char *, const char *);
int cmdiostat(const char *, const char *);
int cmdkre(const char *, const char *);
int cmdnetstat(const char *, const char *);
@@ -188,3 +189,4 @@
SYSTAT_CMD( zarc );
SYSTAT_CMD( sctp );
+SYSTAT_CMD( iolat );
diff --git a/usr.bin/systat/iolat.c b/usr.bin/systat/iolat.c
new file mode 100644
--- /dev/null
+++ b/usr.bin/systat/iolat.c
@@ -0,0 +1,520 @@
+/*
+ * Copyright (c) 2021 Netflix, Inc
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+
+#include <sys/param.h>
+#include <sys/sysctl.h>
+#include <sys/resource.h>
+
+#include <devstat.h>
+#include <err.h>
+#include <errno.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/queue.h>
+#include <sys/sysctl.h>
+
+#include "systat.h"
+#include "extern.h"
+#include "devs.h"
+
+#define CAM_BASE "kern.cam"
+#define LATENCY ".latencies"
+#define CAM_IOSCHED_BASE "kern.cam.iosched.bucket_base_us"
+
+#define DEV_NAMSIZE 32
+#define OP_NAMSIZE 16
+#define MAX_LATS 32
+
+static double high_thresh = 500;
+static double med_thresh = 300;
+static bool docolor = true;
+
+static int ndevs;
+static SLIST_HEAD(, iosched_stat) curlist;
+
+struct iosched_op_stat {
+ int nlats;
+ uint64_t lats[MAX_LATS];
+ uint64_t prev_lats[MAX_LATS];
+};
+
+enum { OP_READ = 0, OP_WRITE, OP_TRIM, NUM_OPS };
+static const char *ops[NUM_OPS] = { "read", "write", "trim" };
+#define OP_READ_MASK (1 << OP_READ)
+#define OP_WRITE_MASK (1 << OP_WRITE)
+#define OP_TRIM_MASK (1 << OP_TRIM)
+
+static uint32_t flags = OP_READ_MASK | OP_WRITE_MASK | OP_TRIM_MASK;
+
+struct iosched_stat {
+ SLIST_ENTRY(iosched_stat) link;
+ char dev_name[DEV_NAMSIZE];
+ int unit;
+ struct iosched_op_stat op_stats[NUM_OPS];
+};
+
+static int name2oid(const char *, int *);
+static int walk_sysctl(int *, size_t);
+
+static int
+name2oid(const char *name, int *oidp)
+{
+ int oid[2];
+ int i;
+ size_t j;
+
+ oid[0] = CTL_SYSCTL;
+ oid[1] = CTL_SYSCTL_NAME2OID;
+
+ j = CTL_MAXNAME * sizeof(int);
+ i = sysctl(oid, 2, oidp, &j, name, strlen(name));
+ if (i < 0)
+ return (i);
+ j /= sizeof(int);
+ return (j);
+}
+
+static size_t /* Includes the trailing NUL */
+oid2name(int *oid, size_t nlen, char *name, size_t namlen)
+{
+ int qoid[CTL_MAXNAME + 2];
+ int i;
+ size_t j;
+
+ bzero(name, namlen);
+ qoid[0] = CTL_SYSCTL;
+ qoid[1] = CTL_SYSCTL_NAME;
+ memcpy(qoid + 2, oid, nlen * sizeof(int));
+ j = namlen;
+ i = sysctl(qoid, nlen + 2, name, &j, 0, 0);
+ if (i || !j)
+ err(1, "sysctl name %d %zu %d", i, j, errno);
+ return (j);
+}
+
+static int
+oidfmt(int *oid, int len, u_int *kind)
+{
+ int qoid[CTL_MAXNAME+2];
+ u_char buf[BUFSIZ];
+ int i;
+ size_t j;
+
+ qoid[0] = CTL_SYSCTL;
+ qoid[1] = CTL_SYSCTL_OIDFMT;
+ memcpy(qoid + 2, oid, len * sizeof(int));
+
+ j = sizeof(buf);
+ i = sysctl(qoid, len + 2, buf, &j, 0, 0);
+ if (i)
+ err(1, "sysctl fmt %d %zu %d", i, j, errno);
+ *kind = *(u_int *)buf;
+ return (0);
+}
+
+static int
+split_u64(char *str, const char *delim, uint64_t *buckets, int *nbuckets)
+{
+ int n = *nbuckets, i;
+ char *v;
+
+ memset(buckets, 0, n * sizeof(buckets[0]));
+ for (i = 0; (v = strsep(&str, delim)) != NULL && i < n; i++) {
+ buckets[i] = strtoull(v, NULL, 10);
+ }
+ if (i < n)
+ *nbuckets = i;
+ return (i < n);
+}
+
+static double baselat = 0.000020;
+
+static float
+pest(int permill, uint64_t *lats, int nlat)
+{
+ uint64_t tot, samp;
+ int i;
+ float b1, b2;
+
+ for (tot = 0, i = 0; i < nlat; i++)
+ tot += lats[i];
+ if (tot == 0)
+ return -nanf("");
+ if (tot < (uint64_t)2000 / (1000 - permill))
+ return nanf("");
+ samp = tot * permill / 1000;
+ if (samp < lats[0])
+ return baselat * (float)samp / lats[0]; /* linear interpolation 0 and baselat */
+ for (tot = 0, i = 0; samp >= tot && i < nlat; i++)
+ tot += lats[i];
+ i--;
+ b1 = baselat * (1 << (i - 1));
+ b2 = baselat * (1 << i);
+ /* Should expoentially interpolate between buckets -- doing linear instead */
+ return b1 + (b2 - b1) * (float)(lats[i] - (tot - samp)) / lats[i];
+}
+
+static int
+op2num(const char *op)
+{
+ for (int i = 0; i < NUM_OPS; i++)
+ if (strcmp(op, ops[i]) == 0)
+ return i;
+ return -1;
+}
+
+static struct iosched_op_stat *
+find_dev(const char *dev, int unit, int op)
+{
+ struct iosched_stat *isp;
+ struct iosched_op_stat *iosp;
+
+ SLIST_FOREACH(isp, &curlist, link) {
+ if (strcmp(isp->dev_name, dev) != 0 || isp->unit != unit)
+ continue;
+ iosp = &isp->op_stats[op];
+ return iosp;
+ }
+ return NULL;
+}
+
+static struct iosched_op_stat *
+alloc_dev(const char *dev, int unit, int op)
+{
+ struct iosched_stat *isp;
+ struct iosched_op_stat *iosp;
+
+ isp = malloc(sizeof(*isp));
+ if (isp == NULL)
+ return NULL;
+ strlcpy(isp->dev_name, dev, sizeof(isp->dev_name));
+ isp->unit = unit;
+ SLIST_INSERT_HEAD(&curlist, isp, link);
+ ndevs++;
+ iosp = &isp->op_stats[op];
+ return iosp;
+}
+
+#define E3 1000.0
+static void
+update_dev(const char *dev, int unit, int op, uint64_t *lats, int nlat)
+{
+ struct iosched_op_stat *iosp;
+
+ iosp = find_dev(dev, unit, op);
+ if (iosp == NULL)
+ iosp = alloc_dev(dev, unit, op);
+ if (iosp == NULL)
+ return;
+ iosp->nlats = nlat;
+ memcpy(iosp->prev_lats, iosp->lats, iosp->nlats * sizeof(uint64_t));
+ memcpy(iosp->lats, lats, iosp->nlats * sizeof(uint64_t));
+// printf("%s%d: %-6s %.3f %.3f %.3f %.3f\r\n",
+// dev, unit, operation, E3 * pest(500, lats, nlat), E3 * pest(900, lats, nlat),
+// E3 * pest(990, lats, nlat), E3 * pest(999, lats, nlat));
+}
+
+static int
+walk_sysctl(int *base_oid, size_t len)
+{
+ int qoid[CTL_MAXNAME + 2], oid[CTL_MAXNAME];
+ size_t l1, l2;
+ char name[BUFSIZ];
+
+ if (len > CTL_MAXNAME)
+ err(1, "Length %zd too long", len);
+
+ qoid[0] = CTL_SYSCTL;
+ qoid[1] = CTL_SYSCTL_NEXT;
+ l1 = 2;
+ memcpy(qoid + 2, base_oid, len * sizeof(int));
+ l1 += len;
+ for (;;) {
+ /*
+ * Get the next one or return when we get to the end of the
+ * sysctls in the kernel.
+ */
+ l2 = sizeof(oid);
+ if (sysctl(qoid, l1, oid, &l2, 0, 0) != 0) {
+ if (errno == ENOENT)
+ return (0);
+ err(1, "sysctl(getnext) %zu", l2);
+ }
+
+ l2 /= sizeof(int);
+
+ /*
+ * Bail if we're seeing OIDs that don't have the
+ * same prefix or can't have the same prefix.
+ */
+ if (l2 < len ||
+ memcmp(oid, base_oid, len * sizeof(int)) != 0)
+ return (0);
+
+ /*
+ * Get the name, validate it's one we're looking for,
+ * parse the latency and add to list.
+ */
+ do {
+ int nlat;
+ size_t l3;
+ char val[BUFSIZ];
+ char *walker, *dev, *opstr;
+ uint64_t latvals[MAX_LATS];
+ u_int kind;
+ int unit, op;
+
+ l1 = oid2name(oid, l2, name, sizeof(name));
+ if (strcmp(name + l1 - strlen(LATENCY) - 1, LATENCY) != 0)
+ break;
+ if (oidfmt(oid, l2, &kind) != 0)
+ err(1, "oidfmt");
+ if ((kind & CTLTYPE) != CTLTYPE_STRING)
+ errx(1, "string");
+ l3 = sizeof(val);
+ if (sysctl(oid, l2, val, &l3, 0, 0) != 0)
+ err(1, "sysctl");
+ val[l3] = '\0';
+ nlat = nitems(latvals);
+ if (split_u64(val, ",", latvals, &nlat) == 0)
+ break;
+ walker = name + strlen(CAM_BASE) + 1;
+ dev = strsep(&walker, ".");
+ unit = (int)strtol(strsep(&walker, "."), NULL, 10);
+ strsep(&walker, ".");
+ opstr = strsep(&walker, ".");
+ op = op2num(opstr);
+ if (op < 0)
+ break;
+ update_dev(dev, unit, op, latvals, nlat);
+ } while (false);
+
+ memcpy(qoid + 2, oid, l2 * sizeof(int));
+ l1 = 2 + l2;
+ }
+}
+
+void
+closeiolat(WINDOW *w)
+{
+ if (w == NULL)
+ return;
+ wclear(w);
+ wrefresh(w);
+ delwin(w);
+}
+
+static void
+doublecmd(const char *cmd, double *v)
+{
+ const char *p;
+ double tv;
+
+ p = strchr(cmd, '=');
+ if (p == NULL)
+ return; /* XXX Tell the user something? */
+ if (sscanf(p + 1, "%lf", &tv) != 1)
+ return; /* XXX Tell the user something? */
+ *v = tv;
+}
+
+int
+cmdiolat(const char *cmd __unused, const char *args __unused)
+{
+ fprintf(stderr, "CMD IS '%s'\n\n", cmd);
+ if (prefix(cmd, "trim"))
+ flags ^= OP_TRIM_MASK;
+ else if (prefix(cmd, "read"))
+ flags ^= OP_READ_MASK;
+ else if (prefix(cmd, "write"))
+ flags ^= OP_WRITE_MASK;
+ else if (prefix(cmd, "color"))
+ docolor = !docolor;
+ else if (prefix("high", cmd))
+ doublecmd(cmd, &high_thresh);
+ else if (prefix("med", cmd))
+ doublecmd(cmd, &med_thresh);
+ else
+ return (0);
+ wclear(wnd);
+ labeliolat();
+ refresh();
+ return (1);
+}
+
+int
+initiolat(void)
+{
+ int cam[CTL_MAXNAME];
+ uint64_t sbt_base;
+ size_t len = sizeof(sbt_base);
+
+ SLIST_INIT(&curlist);
+
+ baselat = 1e-3; /* old default */
+ if (sysctlbyname(CAM_IOSCHED_BASE, &sbt_base, &len, NULL, 0) == 0)
+ baselat = sbt_base * 1e-6; /* Convert to microseconds */
+
+ name2oid(CAM_BASE, cam);
+ walk_sysctl(cam, 2);
+ return (1);
+}
+
+void
+fetchiolat(void)
+{
+ int cam[CTL_MAXNAME];
+
+ name2oid(CAM_BASE, cam);
+ walk_sysctl(cam, 2);
+}
+
+#define INSET 10
+
+void
+labeliolat(void)
+{
+ int _col, ndrives, lpr, row, j;
+ int regions __unused;
+ struct iosched_stat *isp;
+ char tmpstr[32];
+#define COLWIDTH 29
+#define DRIVESPERLINE ((getmaxx(wnd) - 1 - INSET) / COLWIDTH)
+ ndrives = ndevs; // XXX FILTER XXX
+ regions = howmany(ndrives, DRIVESPERLINE);
+ lpr = 2; /* for headers */
+ for (int i = 0; i < NUM_OPS; i++) {
+ if (flags & (1 << i))
+ lpr++;
+ }
+ row = 0;
+ _col = INSET;
+ j = 2;
+ if (flags & OP_READ_MASK)
+ mvwaddstr(wnd, row + j++, 1, "read");
+ if (flags & OP_WRITE_MASK)
+ mvwaddstr(wnd, row + j++, 1, "write");
+ if (flags & OP_TRIM_MASK)
+ mvwaddstr(wnd, row + j++, 1, "trim");
+ SLIST_FOREACH(isp, &curlist, link) {
+ if (_col + COLWIDTH >= getmaxx(wnd) - 1 - INSET) {
+ _col = INSET;
+ row += lpr + 1;
+ if (row > getmaxy(wnd) - 1 - (lpr + 1))
+ break;
+ j = 2;
+ if (flags & OP_READ_MASK)
+ mvwaddstr(wnd, row + j++, 1, "read");
+ if (flags & OP_WRITE_MASK)
+ mvwaddstr(wnd, row + j++, 1, "write");
+ if (flags & OP_TRIM_MASK)
+ mvwaddstr(wnd, row + j++, 1, "trim");
+ }
+ snprintf(tmpstr, sizeof(tmpstr), "%s%d", isp->dev_name, isp->unit);
+ mvwaddstr(wnd, row, _col + (COLWIDTH - strlen(tmpstr)) / 2, tmpstr);
+ mvwaddstr(wnd, row + 1, _col, " p50 p90 p99 p99.9");
+ _col += COLWIDTH;
+ }
+}
+
+WINDOW *
+openiolat(void)
+{
+ return (subwin(stdscr, LINES-3-1, 0, MAINWIN_ROW, 0));
+}
+
+static void
+fmt(float f, char *buf, size_t len)
+{
+ if (isnan(f))
+ strlcpy(buf, " - ", len);
+ else if (f >= 1000.0)
+ snprintf(buf, len, "%6d", (int)f);
+ else if (f >= 100.0)
+ snprintf(buf, len, "%6.1f", f);
+ else if (f >= 10.0)
+ snprintf(buf, len, "%6.2f", f);
+ else
+ snprintf(buf, len, "%6.3f", f);
+}
+
+static void
+latout(double lat, int y, int x)
+{
+ int i;
+ char tmpstr[32];
+
+ fmt(lat, tmpstr, sizeof(tmpstr));
+ if (isnan(lat))
+ i = 4;
+ else if (lat > high_thresh)
+ i = 3;
+ else if (lat > med_thresh)
+ i = 2;
+ else
+ i = 1;
+ if (docolor)
+ wattron(wnd, COLOR_PAIR(i));
+ mvwaddstr(wnd, y, x, tmpstr);
+ if (docolor)
+ wattroff(wnd, COLOR_PAIR(i));
+}
+
+void
+showiolat(void)
+{
+ int _col, ndrives, lpr, row, k;
+ int regions __unused;
+ struct iosched_stat *isp;
+ struct iosched_op_stat *iosp;
+#define COLWIDTH 29
+#define DRIVESPERLINE ((getmaxx(wnd) - 1 - INSET) / COLWIDTH)
+ ndrives = ndevs; // XXX FILTER XXX
+ regions = howmany(ndrives, DRIVESPERLINE);
+ lpr = 2; /* XXX */
+ for (int i = 0; i < NUM_OPS; i++) {
+ if (flags & (1 << i))
+ lpr++;
+ }
+ row = 0;
+ _col = INSET;
+ SLIST_FOREACH(isp, &curlist, link) {
+ if (_col + COLWIDTH >= getmaxx(wnd) - 1 - INSET) {
+ _col = INSET;
+ row += lpr + 1;
+ if (row > getmaxy(wnd) - 1 - (lpr + 1))
+ break;
+ }
+ k = 2;
+ for (int i = 0; i < NUM_OPS; i++) {
+ uint64_t lats[MAX_LATS];
+ int nlats;
+ float p50, p90, p99, p999;
+
+ if ((flags & (1 << i)) == 0)
+ continue;
+ iosp = &isp->op_stats[i];
+ nlats = iosp->nlats;
+ memset(lats, 0, sizeof(lats));
+ for (int j = 0; j < iosp->nlats; j++)
+ lats[j] = iosp->lats[j] - iosp->prev_lats[j];
+ p50 = pest(500, lats, nlats) * E3;
+ p90 = pest(900, lats, nlats) * E3;
+ p99 = pest(990, lats, nlats) * E3;
+ p999 = pest(999, lats, nlats) * E3;
+ latout(p50, row + k, _col);
+ latout(p90, row + k, _col + 7);
+ latout(p99, row + k, _col + 14);
+ latout(p999, row + k, _col + 21);
+ k++;
+ }
+ _col += COLWIDTH;
+ }
+}
diff --git a/usr.bin/systat/main.c b/usr.bin/systat/main.c
--- a/usr.bin/systat/main.c
+++ b/usr.bin/systat/main.c
@@ -155,6 +155,7 @@
char errbuf[_POSIX2_LINE_MAX], dummy;
size_t size;
struct cmdentry *cmd = NULL;
+ short cf, cb;
(void) setlocale(LC_ALL, "");
@@ -214,6 +215,13 @@
* routines to minimize update work by curses.
*/
initscr();
+ start_color();
+ use_default_colors();
+ pair_content(0, &cf, &cb);
+ init_pair(1, COLOR_GREEN, cb);
+ init_pair(2, COLOR_MAGENTA, cb);
+ init_pair(3, COLOR_RED, cb);
+ init_pair(4, COLOR_BLUE, cb);
CMDLINE = LINES - 1;
wnd = (*curcmd->c_open)();
if (wnd == NULL) {
diff --git a/usr.bin/systat/systat.1 b/usr.bin/systat/systat.1
--- a/usr.bin/systat/systat.1
+++ b/usr.bin/systat/systat.1
@@ -88,6 +88,7 @@
.Ic icmp ,
.Ic icmp6 ,
.Ic ifstat ,
+.Ic iolat ,
.Ic iostat ,
.Ic ip ,
.Ic ip6 ,
@@ -244,6 +245,59 @@
Like
.Ic icmp ,
but with TCP statistics.
+.It Ic iolat
+Display statistics describing the hardware latencies of I/O operations as
+computed by the
+.Va CAM_IOSCHED_DYNAMIC
+option.
+This option must be in the kernel config file of the running kernel for this
+display to work.
+All devices are displayed as there is currently no way to filter them.
+The statistics displayed for the I/O latencies are the percentiles with
+sufficient data during the polling interval to compute.
+If a value cannot be estimated ``-'' is displayed.
+The P50 (also known as the median), P90, P99 and P99.9 values are computed if
+more than 2, 10, 100 or 1000 operations occurred during the polling interval.
+The latency is the hardware latency values, and does not include any software
+queueing time.
+The latencies are estimated based on histogram data computed by the CAM I/O
+scheduler and represent estimates of the actual value that are only good to
+two or three significant digits.
+The display of latency changes based on the scale of the latency to reflect
+the precision of the estimates and to fit on the available screen space.
+All latencies are reported in milliseconds.
+When color is enabled
+.Bl -bullet
+.It Values below the medium latency threshold are displayed in green.
+.It Values between the minimum latency and high latency thresholds are displayed
+in magenta.
+.It Values above the high latency thresholds are displayed in red.
+.Pp
+When color is disabled, the default foreground and background colors are always
+used.
+.Pp
+The following commands are specific to the
+.Ic iolat
+display; the minimum unambiguous prefix may be supplied.
+.Pp
+.Bl -tag -width Fl -compact
+.It Cm color
+Toggle the use of color in the display.
+The default is on.
+.It Cm hi=XXX
+Set the high latency threshold to XXX milliseconds.
+.It Cm med=XXX
+Set the medium latency threshold to XXX milliseconds.
+.It Cm read
+Toggle the display of statistics about read operations.
+The default is on.
+.It Cm write
+Toggle the display of statistics about write operations.
+The default is on.
+.It Cm trim
+Toggle the display of statistics about trim operations.
+The default is on.
+.El
.It Ic iostat
Display, in the lower window, statistics about processor use
and disk throughput.
@@ -682,3 +736,9 @@
.Ic vmstat
display looks out of place because it is (it was added in as
a separate display rather than created as a new program).
+The
+.Ic iolat
+command doesn't implement the common device commands including
+filtering, as it doesn't use the
+.Xr devstat 3
+mechanism to obtain its statistics.

File Metadata

Mime Type
text/plain
Expires
Thu, Feb 6, 2:23 AM (21 h, 33 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16485234
Default Alt Text
D34259.diff (16 KB)

Event Timeline